drm: ati-3.19-rc1

git-svn-id: svn://kolibrios.org@5271 a494cfbc-eb01-0410-851d-a64ba20cac60
2024-11-23 01:11:19 +03:00 · 2014-12-27 15:58:21 +00:00 · 2014-12-27 15:58:21 +00:00 · b4abefe936
commit b4abefe936
parent 16bc56fa96
121 changed files with 7563 additions and 2674 deletions
--- a/drivers/video/drm/drm_atomic.c
+++ b/drivers/video/drm/drm_atomic.c
@ -0,0 +1,715 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Copyright (C) 2014 Intel Corp.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_plane_helper.h>
+
+static void kfree_state(struct drm_atomic_state *state)
+{
+	kfree(state->connectors);
+	kfree(state->connector_states);
+	kfree(state->crtcs);
+	kfree(state->crtc_states);
+	kfree(state->planes);
+	kfree(state->plane_states);
+	kfree(state);
+}
+
+/**
+ * drm_atomic_state_alloc - allocate atomic state
+ * @dev: DRM device
+ *
+ * This allocates an empty atomic state to track updates.
+ */
+struct drm_atomic_state *
+drm_atomic_state_alloc(struct drm_device *dev)
+{
+	struct drm_atomic_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	state->num_connector = ACCESS_ONCE(dev->mode_config.num_connector);
+
+	state->crtcs = kcalloc(dev->mode_config.num_crtc,
+			       sizeof(*state->crtcs), GFP_KERNEL);
+	if (!state->crtcs)
+		goto fail;
+	state->crtc_states = kcalloc(dev->mode_config.num_crtc,
+				     sizeof(*state->crtc_states), GFP_KERNEL);
+	if (!state->crtc_states)
+		goto fail;
+	state->planes = kcalloc(dev->mode_config.num_total_plane,
+				sizeof(*state->planes), GFP_KERNEL);
+	if (!state->planes)
+		goto fail;
+	state->plane_states = kcalloc(dev->mode_config.num_total_plane,
+				      sizeof(*state->plane_states), GFP_KERNEL);
+	if (!state->plane_states)
+		goto fail;
+	state->connectors = kcalloc(state->num_connector,
+				    sizeof(*state->connectors),
+				    GFP_KERNEL);
+	if (!state->connectors)
+		goto fail;
+	state->connector_states = kcalloc(state->num_connector,
+					  sizeof(*state->connector_states),
+					  GFP_KERNEL);
+	if (!state->connector_states)
+		goto fail;
+
+	state->dev = dev;
+
+	DRM_DEBUG_KMS("Allocate atomic state %p\n", state);
+
+	return state;
+fail:
+	kfree_state(state);
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_atomic_state_alloc);
+
+/**
+ * drm_atomic_state_clear - clear state object
+ * @state: atomic state
+ *
+ * When the w/w mutex algorithm detects a deadlock we need to back off and drop
+ * all locks. So someone else could sneak in and change the current modeset
+ * configuration. Which means that all the state assembled in @state is no
+ * longer an atomic update to the current state, but to some arbitrary earlier
+ * state. Which could break assumptions the driver's ->atomic_check likely
+ * relies on.
+ *
+ * Hence we must clear all cached state and completely start over, using this
+ * function.
+ */
+void drm_atomic_state_clear(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_mode_config *config = &dev->mode_config;
+	int i;
+
+	DRM_DEBUG_KMS("Clearing atomic state %p\n", state);
+
+	for (i = 0; i < state->num_connector; i++) {
+		struct drm_connector *connector = state->connectors[i];
+
+		if (!connector)
+			continue;
+
+		WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
+
+		connector->funcs->atomic_destroy_state(connector,
+						       state->connector_states[i]);
+	}
+
+	for (i = 0; i < config->num_crtc; i++) {
+		struct drm_crtc *crtc = state->crtcs[i];
+
+		if (!crtc)
+			continue;
+
+		crtc->funcs->atomic_destroy_state(crtc,
+						  state->crtc_states[i]);
+	}
+
+	for (i = 0; i < config->num_total_plane; i++) {
+		struct drm_plane *plane = state->planes[i];
+
+		if (!plane)
+			continue;
+
+		plane->funcs->atomic_destroy_state(plane,
+						   state->plane_states[i]);
+	}
+}
+EXPORT_SYMBOL(drm_atomic_state_clear);
+
+/**
+ * drm_atomic_state_free - free all memory for an atomic state
+ * @state: atomic state to deallocate
+ *
+ * This frees all memory associated with an atomic state, including all the
+ * per-object state for planes, crtcs and connectors.
+ */
+void drm_atomic_state_free(struct drm_atomic_state *state)
+{
+	drm_atomic_state_clear(state);
+
+	DRM_DEBUG_KMS("Freeing atomic state %p\n", state);
+
+	kfree_state(state);
+}
+EXPORT_SYMBOL(drm_atomic_state_free);
+
+/**
+ * drm_atomic_get_crtc_state - get crtc state
+ * @state: global atomic state object
+ * @crtc: crtc to get state object for
+ *
+ * This function returns the crtc state for the given crtc, allocating it if
+ * needed. It will also grab the relevant crtc lock to make sure that the state
+ * is consistent.
+ *
+ * Returns:
+ *
+ * Either the allocated state or the error code encoded into the pointer. When
+ * the error is EDEADLK then the w/w mutex code has detected a deadlock and the
+ * entire atomic sequence must be restarted. All other errors are fatal.
+ */
+struct drm_crtc_state *
+drm_atomic_get_crtc_state(struct drm_atomic_state *state,
+			  struct drm_crtc *crtc)
+{
+	int ret, index;
+	struct drm_crtc_state *crtc_state;
+
+	index = drm_crtc_index(crtc);
+
+	if (state->crtc_states[index])
+		return state->crtc_states[index];
+
+	ret = drm_modeset_lock(&crtc->mutex, state->acquire_ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
+	crtc_state = crtc->funcs->atomic_duplicate_state(crtc);
+	if (!crtc_state)
+		return ERR_PTR(-ENOMEM);
+
+	state->crtc_states[index] = crtc_state;
+	state->crtcs[index] = crtc;
+	crtc_state->state = state;
+
+	DRM_DEBUG_KMS("Added [CRTC:%d] %p state to %p\n",
+		      crtc->base.id, crtc_state, state);
+
+	return crtc_state;
+}
+EXPORT_SYMBOL(drm_atomic_get_crtc_state);
+
+/**
+ * drm_atomic_get_plane_state - get plane state
+ * @state: global atomic state object
+ * @plane: plane to get state object for
+ *
+ * This function returns the plane state for the given plane, allocating it if
+ * needed. It will also grab the relevant plane lock to make sure that the state
+ * is consistent.
+ *
+ * Returns:
+ *
+ * Either the allocated state or the error code encoded into the pointer. When
+ * the error is EDEADLK then the w/w mutex code has detected a deadlock and the
+ * entire atomic sequence must be restarted. All other errors are fatal.
+ */
+struct drm_plane_state *
+drm_atomic_get_plane_state(struct drm_atomic_state *state,
+			  struct drm_plane *plane)
+{
+	int ret, index;
+	struct drm_plane_state *plane_state;
+
+	index = drm_plane_index(plane);
+
+	if (state->plane_states[index])
+		return state->plane_states[index];
+
+	ret = drm_modeset_lock(&plane->mutex, state->acquire_ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
+	plane_state = plane->funcs->atomic_duplicate_state(plane);
+	if (!plane_state)
+		return ERR_PTR(-ENOMEM);
+
+	state->plane_states[index] = plane_state;
+	state->planes[index] = plane;
+	plane_state->state = state;
+
+	DRM_DEBUG_KMS("Added [PLANE:%d] %p state to %p\n",
+		      plane->base.id, plane_state, state);
+
+	if (plane_state->crtc) {
+		struct drm_crtc_state *crtc_state;
+
+		crtc_state = drm_atomic_get_crtc_state(state,
+						       plane_state->crtc);
+		if (IS_ERR(crtc_state))
+			return ERR_CAST(crtc_state);
+	}
+
+	return plane_state;
+}
+EXPORT_SYMBOL(drm_atomic_get_plane_state);
+
+/**
+ * drm_atomic_get_connector_state - get connector state
+ * @state: global atomic state object
+ * @connector: connector to get state object for
+ *
+ * This function returns the connector state for the given connector,
+ * allocating it if needed. It will also grab the relevant connector lock to
+ * make sure that the state is consistent.
+ *
+ * Returns:
+ *
+ * Either the allocated state or the error code encoded into the pointer. When
+ * the error is EDEADLK then the w/w mutex code has detected a deadlock and the
+ * entire atomic sequence must be restarted. All other errors are fatal.
+ */
+struct drm_connector_state *
+drm_atomic_get_connector_state(struct drm_atomic_state *state,
+			  struct drm_connector *connector)
+{
+	int ret, index;
+	struct drm_mode_config *config = &connector->dev->mode_config;
+	struct drm_connector_state *connector_state;
+
+	ret = drm_modeset_lock(&config->connection_mutex, state->acquire_ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
+	index = drm_connector_index(connector);
+
+	/*
+	 * Construction of atomic state updates can race with a connector
+	 * hot-add which might overflow. In this case flip the table and just
+	 * restart the entire ioctl - no one is fast enough to livelock a cpu
+	 * with physical hotplug events anyway.
+	 *
+	 * Note that we only grab the indexes once we have the right lock to
+	 * prevent hotplug/unplugging of connectors. So removal is no problem,
+	 * at most the array is a bit too large.
+	 */
+	if (index >= state->num_connector) {
+		DRM_DEBUG_KMS("Hot-added connector would overflow state array, restarting\n");
+		return ERR_PTR(-EAGAIN);
+	}
+
+	if (state->connector_states[index])
+		return state->connector_states[index];
+
+	connector_state = connector->funcs->atomic_duplicate_state(connector);
+	if (!connector_state)
+		return ERR_PTR(-ENOMEM);
+
+	state->connector_states[index] = connector_state;
+	state->connectors[index] = connector;
+	connector_state->state = state;
+
+	DRM_DEBUG_KMS("Added [CONNECTOR:%d] %p state to %p\n",
+		      connector->base.id, connector_state, state);
+
+	if (connector_state->crtc) {
+		struct drm_crtc_state *crtc_state;
+
+		crtc_state = drm_atomic_get_crtc_state(state,
+						       connector_state->crtc);
+		if (IS_ERR(crtc_state))
+			return ERR_CAST(crtc_state);
+	}
+
+	return connector_state;
+}
+EXPORT_SYMBOL(drm_atomic_get_connector_state);
+
+/**
+ * drm_atomic_set_crtc_for_plane - set crtc for plane
+ * @state: the incoming atomic state
+ * @plane: the plane whose incoming state to update
+ * @crtc: crtc to use for the plane
+ *
+ * Changing the assigned crtc for a plane requires us to grab the lock and state
+ * for the new crtc, as needed. This function takes care of all these details
+ * besides updating the pointer in the state object itself.
+ *
+ * Returns:
+ * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK
+ * then the w/w mutex code has detected a deadlock and the entire atomic
+ * sequence must be restarted. All other errors are fatal.
+ */
+int
+drm_atomic_set_crtc_for_plane(struct drm_atomic_state *state,
+			      struct drm_plane *plane, struct drm_crtc *crtc)
+{
+	struct drm_plane_state *plane_state =
+			drm_atomic_get_plane_state(state, plane);
+	struct drm_crtc_state *crtc_state;
+
+	if (WARN_ON(IS_ERR(plane_state)))
+		return PTR_ERR(plane_state);
+
+	if (plane_state->crtc) {
+		crtc_state = drm_atomic_get_crtc_state(plane_state->state,
+						       plane_state->crtc);
+		if (WARN_ON(IS_ERR(crtc_state)))
+			return PTR_ERR(crtc_state);
+
+		crtc_state->plane_mask &= ~(1 << drm_plane_index(plane));
+	}
+
+	plane_state->crtc = crtc;
+
+	if (crtc) {
+		crtc_state = drm_atomic_get_crtc_state(plane_state->state,
+						       crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+		crtc_state->plane_mask |= (1 << drm_plane_index(plane));
+	}
+
+	if (crtc)
+		DRM_DEBUG_KMS("Link plane state %p to [CRTC:%d]\n",
+			      plane_state, crtc->base.id);
+	else
+		DRM_DEBUG_KMS("Link plane state %p to [NOCRTC]\n", plane_state);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_atomic_set_crtc_for_plane);
+
+/**
+ * drm_atomic_set_fb_for_plane - set crtc for plane
+ * @plane_state: atomic state object for the plane
+ * @fb: fb to use for the plane
+ *
+ * Changing the assigned framebuffer for a plane requires us to grab a reference
+ * to the new fb and drop the reference to the old fb, if there is one. This
+ * function takes care of all these details besides updating the pointer in the
+ * state object itself.
+ */
+void
+drm_atomic_set_fb_for_plane(struct drm_plane_state *plane_state,
+			    struct drm_framebuffer *fb)
+{
+	if (plane_state->fb)
+		drm_framebuffer_unreference(plane_state->fb);
+	if (fb)
+		drm_framebuffer_reference(fb);
+	plane_state->fb = fb;
+
+	if (fb)
+		DRM_DEBUG_KMS("Set [FB:%d] for plane state %p\n",
+			      fb->base.id, plane_state);
+	else
+		DRM_DEBUG_KMS("Set [NOFB] for plane state %p\n", plane_state);
+}
+EXPORT_SYMBOL(drm_atomic_set_fb_for_plane);
+
+/**
+ * drm_atomic_set_crtc_for_connector - set crtc for connector
+ * @conn_state: atomic state object for the connector
+ * @crtc: crtc to use for the connector
+ *
+ * Changing the assigned crtc for a connector requires us to grab the lock and
+ * state for the new crtc, as needed. This function takes care of all these
+ * details besides updating the pointer in the state object itself.
+ *
+ * Returns:
+ * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK
+ * then the w/w mutex code has detected a deadlock and the entire atomic
+ * sequence must be restarted. All other errors are fatal.
+ */
+int
+drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
+				  struct drm_crtc *crtc)
+{
+	struct drm_crtc_state *crtc_state;
+
+	if (crtc) {
+		crtc_state = drm_atomic_get_crtc_state(conn_state->state, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+	}
+
+	conn_state->crtc = crtc;
+
+	if (crtc)
+		DRM_DEBUG_KMS("Link connector state %p to [CRTC:%d]\n",
+			      conn_state, crtc->base.id);
+	else
+		DRM_DEBUG_KMS("Link connector state %p to [NOCRTC]\n",
+			      conn_state);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector);
+
+/**
+ * drm_atomic_add_affected_connectors - add connectors for crtc
+ * @state: atomic state
+ * @crtc: DRM crtc
+ *
+ * This function walks the current configuration and adds all connectors
+ * currently using @crtc to the atomic configuration @state. Note that this
+ * function must acquire the connection mutex. This can potentially cause
+ * unneeded seralization if the update is just for the planes on one crtc. Hence
+ * drivers and helpers should only call this when really needed (e.g. when a
+ * full modeset needs to happen due to some change).
+ *
+ * Returns:
+ * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK
+ * then the w/w mutex code has detected a deadlock and the entire atomic
+ * sequence must be restarted. All other errors are fatal.
+ */
+int
+drm_atomic_add_affected_connectors(struct drm_atomic_state *state,
+				   struct drm_crtc *crtc)
+{
+	struct drm_mode_config *config = &state->dev->mode_config;
+	struct drm_connector *connector;
+	struct drm_connector_state *conn_state;
+	int ret;
+
+	ret = drm_modeset_lock(&config->connection_mutex, state->acquire_ctx);
+	if (ret)
+		return ret;
+
+	DRM_DEBUG_KMS("Adding all current connectors for [CRTC:%d] to %p\n",
+		      crtc->base.id, state);
+
+	/*
+	 * Changed connectors are already in @state, so only need to look at the
+	 * current configuration.
+	 */
+	list_for_each_entry(connector, &config->connector_list, head) {
+		if (connector->state->crtc != crtc)
+			continue;
+
+		conn_state = drm_atomic_get_connector_state(state, connector);
+		if (IS_ERR(conn_state))
+			return PTR_ERR(conn_state);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_atomic_add_affected_connectors);
+
+/**
+ * drm_atomic_connectors_for_crtc - count number of connected outputs
+ * @state: atomic state
+ * @crtc: DRM crtc
+ *
+ * This function counts all connectors which will be connected to @crtc
+ * according to @state. Useful to recompute the enable state for @crtc.
+ */
+int
+drm_atomic_connectors_for_crtc(struct drm_atomic_state *state,
+			       struct drm_crtc *crtc)
+{
+	int i, num_connected_connectors = 0;
+
+	for (i = 0; i < state->num_connector; i++) {
+		struct drm_connector_state *conn_state;
+
+		conn_state = state->connector_states[i];
+
+		if (conn_state && conn_state->crtc == crtc)
+			num_connected_connectors++;
+	}
+
+	DRM_DEBUG_KMS("State %p has %i connectors for [CRTC:%d]\n",
+		      state, num_connected_connectors, crtc->base.id);
+
+	return num_connected_connectors;
+}
+EXPORT_SYMBOL(drm_atomic_connectors_for_crtc);
+
+/**
+ * drm_atomic_legacy_backoff - locking backoff for legacy ioctls
+ * @state: atomic state
+ *
+ * This function should be used by legacy entry points which don't understand
+ * -EDEADLK semantics. For simplicity this one will grab all modeset locks after
+ *  the slowpath completed.
+ */
+void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
+{
+	int ret;
+
+retry:
+	drm_modeset_backoff(state->acquire_ctx);
+
+	ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex,
+			       state->acquire_ctx);
+	if (ret)
+		goto retry;
+	ret = drm_modeset_lock_all_crtcs(state->dev,
+					 state->acquire_ctx);
+	if (ret)
+		goto retry;
+}
+EXPORT_SYMBOL(drm_atomic_legacy_backoff);
+
+/**
+ * drm_atomic_check_only - check whether a given config would work
+ * @state: atomic configuration to check
+ *
+ * Note that this function can return -EDEADLK if the driver needed to acquire
+ * more locks but encountered a deadlock. The caller must then do the usual w/w
+ * backoff dance and restart. All other errors are fatal.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int drm_atomic_check_only(struct drm_atomic_state *state)
+{
+	struct drm_mode_config *config = &state->dev->mode_config;
+
+	DRM_DEBUG_KMS("checking %p\n", state);
+
+	if (config->funcs->atomic_check)
+		return config->funcs->atomic_check(state->dev, state);
+	else
+		return 0;
+}
+EXPORT_SYMBOL(drm_atomic_check_only);
+
+/**
+ * drm_atomic_commit - commit configuration atomically
+ * @state: atomic configuration to check
+ *
+ * Note that this function can return -EDEADLK if the driver needed to acquire
+ * more locks but encountered a deadlock. The caller must then do the usual w/w
+ * backoff dance and restart. All other errors are fatal.
+ *
+ * Also note that on successful execution ownership of @state is transferred
+ * from the caller of this function to the function itself. The caller must not
+ * free or in any other way access @state. If the function fails then the caller
+ * must clean up @state itself.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int drm_atomic_commit(struct drm_atomic_state *state)
+{
+	struct drm_mode_config *config = &state->dev->mode_config;
+	int ret;
+
+	ret = drm_atomic_check_only(state);
+	if (ret)
+		return ret;
+
+	DRM_DEBUG_KMS("commiting %p\n", state);
+
+	return config->funcs->atomic_commit(state->dev, state, false);
+}
+EXPORT_SYMBOL(drm_atomic_commit);
+
+/**
+ * drm_atomic_async_commit - atomic&async configuration commit
+ * @state: atomic configuration to check
+ *
+ * Note that this function can return -EDEADLK if the driver needed to acquire
+ * more locks but encountered a deadlock. The caller must then do the usual w/w
+ * backoff dance and restart. All other errors are fatal.
+ *
+ * Also note that on successful execution ownership of @state is transferred
+ * from the caller of this function to the function itself. The caller must not
+ * free or in any other way access @state. If the function fails then the caller
+ * must clean up @state itself.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int drm_atomic_async_commit(struct drm_atomic_state *state)
+{
+	struct drm_mode_config *config = &state->dev->mode_config;
+	int ret;
+
+	ret = drm_atomic_check_only(state);
+	if (ret)
+		return ret;
+
+	DRM_DEBUG_KMS("commiting %p asynchronously\n", state);
+
+	return config->funcs->atomic_commit(state->dev, state, true);
+}
+EXPORT_SYMBOL(drm_atomic_async_commit);
+
+/**
+ * drm_atomic_helper_plane_duplicate_state - default state duplicate hook
+ * @plane: drm plane
+ *
+ * Default plane state duplicate hook for drivers which don't have their own
+ * subclassed plane state structure.
+ */
+struct drm_plane_state *
+drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane)
+{
+    struct drm_plane_state *state;
+
+    if (WARN_ON(!plane->state))
+        return NULL;
+
+    state = kmemdup(plane->state, sizeof(*plane->state), GFP_KERNEL);
+
+    if (state && state->fb)
+        drm_framebuffer_reference(state->fb);
+
+    return state;
+}
+EXPORT_SYMBOL(drm_atomic_helper_plane_duplicate_state);
+
+
+/**
+ * drm_atomic_helper_crtc_destroy_state - default state destroy hook
+ * @crtc: drm CRTC
+ * @state: CRTC state object to release
+ *
+ * Default CRTC state destroy hook for drivers which don't have their own
+ * subclassed CRTC state structure.
+ */
+void drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
+                      struct drm_crtc_state *state)
+{
+    kfree(state);
+}
+EXPORT_SYMBOL(drm_atomic_helper_crtc_destroy_state);
+
+/**
+ * drm_atomic_helper_plane_destroy_state - default state destroy hook
+ * @plane: drm plane
+ * @state: plane state object to release
+ *
+ * Default plane state destroy hook for drivers which don't have their own
+ * subclassed plane state structure.
+ */
+void drm_atomic_helper_plane_destroy_state(struct drm_plane *plane,
+                       struct drm_plane_state *state)
+{
+    if (state->fb)
+        drm_framebuffer_unreference(state->fb);
+
+    kfree(state);
+}
+EXPORT_SYMBOL(drm_atomic_helper_plane_destroy_state);
--- a/drivers/video/drm/drm_cache.c
+++ b/drivers/video/drm/drm_cache.c
@ -29,15 +29,10 @@
 */

 #include <linux/export.h>
-#include <linux/scatterlist.h>
 #include <drm/drmP.h>

 extern int x86_clflush_size;

-static inline void clflush(volatile void *__p)
-{
-    asm volatile("clflush %0" : "+m" (*(volatile char*)__p));
-}

 #if 0
 static void
--- a/drivers/video/drm/drm_crtc.c
+++ b/drivers/video/drm/drm_crtc.c
--- a/drivers/video/drm/drm_crtc_helper.c
+++ b/drivers/video/drm/drm_crtc_helper.c
@ -34,12 +34,35 @@
 #include <linux/moduleparam.h>

 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>

+/**
+ * DOC: overview
+ *
+ * The CRTC modeset helper library provides a default set_config implementation
+ * in drm_crtc_helper_set_config(). Plus a few other convenience functions using
+ * the same callbacks which drivers can use to e.g. restore the modeset
+ * configuration on resume with drm_helper_resume_force_mode().
+ *
+ * The driver callbacks are mostly compatible with the atomic modeset helpers,
+ * except for the handling of the primary plane: Atomic helpers require that the
+ * primary plane is implemented as a real standalone plane and not directly tied
+ * to the CRTC state. For easier transition this library provides functions to
+ * implement the old semantics required by the CRTC helpers using the new plane
+ * and atomic helper callbacks.
+ *
+ * Drivers are strongly urged to convert to the atomic helpers (by way of first
+ * converting to the plane helpers). New drivers must not use these functions
+ * but need to implement the atomic interface instead, potentially using the
+ * atomic helpers for that.
+ */
 MODULE_AUTHOR("David Airlie, Jesse Barnes");
 MODULE_DESCRIPTION("DRM KMS helper");
 MODULE_LICENSE("GPL and additional rights");
@ -871,3 +894,112 @@ void drm_helper_resume_force_mode(struct drm_device *dev)
 	drm_modeset_unlock_all(dev);
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);
+
+/**
+ * drm_helper_crtc_mode_set - mode_set implementation for atomic plane helpers
+ * @crtc: DRM CRTC
+ * @mode: DRM display mode which userspace requested
+ * @adjusted_mode: DRM display mode adjusted by ->mode_fixup callbacks
+ * @x: x offset of the CRTC scanout area on the underlying framebuffer
+ * @y: y offset of the CRTC scanout area on the underlying framebuffer
+ * @old_fb: previous framebuffer
+ *
+ * This function implements a callback useable as the ->mode_set callback
+ * required by the crtc helpers. Besides the atomic plane helper functions for
+ * the primary plane the driver must also provide the ->mode_set_nofb callback
+ * to set up the crtc.
+ *
+ * This is a transitional helper useful for converting drivers to the atomic
+ * interfaces.
+ */
+int drm_helper_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode, int x, int y,
+			     struct drm_framebuffer *old_fb)
+{
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	int ret;
+
+	if (crtc->funcs->atomic_duplicate_state)
+		crtc_state = crtc->funcs->atomic_duplicate_state(crtc);
+	else if (crtc->state)
+		crtc_state = kmemdup(crtc->state, sizeof(*crtc_state),
+				     GFP_KERNEL);
+	else
+		crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL);
+	if (!crtc_state)
+		return -ENOMEM;
+
+	crtc_state->enable = true;
+	crtc_state->planes_changed = true;
+	crtc_state->mode_changed = true;
+	drm_mode_copy(&crtc_state->mode, mode);
+	drm_mode_copy(&crtc_state->adjusted_mode, adjusted_mode);
+
+	if (crtc_funcs->atomic_check) {
+		ret = crtc_funcs->atomic_check(crtc, crtc_state);
+		if (ret) {
+			kfree(crtc_state);
+
+			return ret;
+		}
+	}
+
+	swap(crtc->state, crtc_state);
+
+	crtc_funcs->mode_set_nofb(crtc);
+
+	if (crtc_state) {
+		if (crtc->funcs->atomic_destroy_state)
+			crtc->funcs->atomic_destroy_state(crtc, crtc_state);
+		else
+			kfree(crtc_state);
+	}
+
+	return drm_helper_crtc_mode_set_base(crtc, x, y, old_fb);
+}
+EXPORT_SYMBOL(drm_helper_crtc_mode_set);
+
+/**
+ * drm_helper_crtc_mode_set_base - mode_set_base implementation for atomic plane helpers
+ * @crtc: DRM CRTC
+ * @x: x offset of the CRTC scanout area on the underlying framebuffer
+ * @y: y offset of the CRTC scanout area on the underlying framebuffer
+ * @old_fb: previous framebuffer
+ *
+ * This function implements a callback useable as the ->mode_set_base used
+ * required by the crtc helpers. The driver must provide the atomic plane helper
+ * functions for the primary plane.
+ *
+ * This is a transitional helper useful for converting drivers to the atomic
+ * interfaces.
+ */
+int drm_helper_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+				  struct drm_framebuffer *old_fb)
+{
+	struct drm_plane_state *plane_state;
+	struct drm_plane *plane = crtc->primary;
+
+	if (plane->funcs->atomic_duplicate_state)
+		plane_state = plane->funcs->atomic_duplicate_state(plane);
+	else if (plane->state)
+		plane_state = drm_atomic_helper_plane_duplicate_state(plane);
+	else
+		plane_state = kzalloc(sizeof(*plane_state), GFP_KERNEL);
+	if (!plane_state)
+		return -ENOMEM;
+
+	plane_state->crtc = crtc;
+	drm_atomic_set_fb_for_plane(plane_state, crtc->primary->fb);
+	plane_state->crtc_x = 0;
+	plane_state->crtc_y = 0;
+	plane_state->crtc_h = crtc->mode.vdisplay;
+	plane_state->crtc_w = crtc->mode.hdisplay;
+	plane_state->src_x = x << 16;
+	plane_state->src_y = y << 16;
+	plane_state->src_h = crtc->mode.vdisplay << 16;
+	plane_state->src_w = crtc->mode.hdisplay << 16;
+
+	return drm_plane_helper_commit(plane, plane_state, old_fb);
+}
+EXPORT_SYMBOL(drm_helper_crtc_mode_set_base);
--- a/drivers/video/drm/drm_dp_helper.c
+++ b/drivers/video/drm/drm_dp_helper.c
@ -27,8 +27,8 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/i2c.h>
-#include <drm/drmP.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drmP.h>

 /**
 * DOC: dp helpers
@ -39,198 +39,6 @@
 * blocks, ...
 */

-/* Run a single AUX_CH I2C transaction, writing/reading data as necessary */
-static int
-i2c_algo_dp_aux_transaction(struct i2c_adapter *adapter, int mode,
-			    uint8_t write_byte, uint8_t *read_byte)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-
-	ret = (*algo_data->aux_ch)(adapter, mode,
-				   write_byte, read_byte);
-	return ret;
-}
-
-/*
- * I2C over AUX CH
- */
-
-/*
- * Send the address. If the I2C link is running, this 'restarts'
- * the connection with the new address, this is used for doing
- * a write followed by a read (as needed for DDC)
- */
-static int
-i2c_algo_dp_aux_address(struct i2c_adapter *adapter, u16 address, bool reading)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int mode = MODE_I2C_START;
-	int ret;
-
-	if (reading)
-		mode |= MODE_I2C_READ;
-	else
-		mode |= MODE_I2C_WRITE;
-	algo_data->address = address;
-	algo_data->running = true;
-	ret = i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
-	return ret;
-}
-
-/*
- * Stop the I2C transaction. This closes out the link, sending
- * a bare address packet with the MOT bit turned off
- */
-static void
-i2c_algo_dp_aux_stop(struct i2c_adapter *adapter, bool reading)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int mode = MODE_I2C_STOP;
-
-	if (reading)
-		mode |= MODE_I2C_READ;
-	else
-		mode |= MODE_I2C_WRITE;
-	if (algo_data->running) {
-		(void) i2c_algo_dp_aux_transaction(adapter, mode, 0, NULL);
-		algo_data->running = false;
-	}
-}
-
-/*
- * Write a single byte to the current I2C address, the
- * the I2C link must be running or this returns -EIO
- */
-static int
-i2c_algo_dp_aux_put_byte(struct i2c_adapter *adapter, u8 byte)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-
-	if (!algo_data->running)
-		return -EIO;
-
-	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_WRITE, byte, NULL);
-	return ret;
-}
-
-/*
- * Read a single byte from the current I2C address, the
- * I2C link must be running or this returns -EIO
- */
-static int
-i2c_algo_dp_aux_get_byte(struct i2c_adapter *adapter, u8 *byte_ret)
-{
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	int ret;
-
-	if (!algo_data->running)
-		return -EIO;
-
-	ret = i2c_algo_dp_aux_transaction(adapter, MODE_I2C_READ, 0, byte_ret);
-	return ret;
-}
-
-static int
-i2c_algo_dp_aux_xfer(struct i2c_adapter *adapter,
-		     struct i2c_msg *msgs,
-		     int num)
-{
-	int ret = 0;
-	bool reading = false;
-	int m;
-	int b;
-
-	for (m = 0; m < num; m++) {
-		u16 len = msgs[m].len;
-		u8 *buf = msgs[m].buf;
-		reading = (msgs[m].flags & I2C_M_RD) != 0;
-		ret = i2c_algo_dp_aux_address(adapter, msgs[m].addr, reading);
-		if (ret < 0)
-			break;
-		if (reading) {
-			for (b = 0; b < len; b++) {
-				ret = i2c_algo_dp_aux_get_byte(adapter, &buf[b]);
-				if (ret < 0)
-					break;
-			}
-		} else {
-			for (b = 0; b < len; b++) {
-				ret = i2c_algo_dp_aux_put_byte(adapter, buf[b]);
-				if (ret < 0)
-					break;
-			}
-		}
-		if (ret < 0)
-			break;
-	}
-	if (ret >= 0)
-		ret = num;
-	i2c_algo_dp_aux_stop(adapter, reading);
-	DRM_DEBUG_KMS("dp_aux_xfer return %d\n", ret);
-	return ret;
-}
-
-static u32
-i2c_algo_dp_aux_functionality(struct i2c_adapter *adapter)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
-	       I2C_FUNC_SMBUS_READ_BLOCK_DATA |
-	       I2C_FUNC_SMBUS_BLOCK_PROC_CALL |
-	       I2C_FUNC_10BIT_ADDR;
-}
-
-static const struct i2c_algorithm i2c_dp_aux_algo = {
-	.master_xfer	= i2c_algo_dp_aux_xfer,
-	.functionality	= i2c_algo_dp_aux_functionality,
-};
-
-static void
-i2c_dp_aux_reset_bus(struct i2c_adapter *adapter)
-{
-	(void) i2c_algo_dp_aux_address(adapter, 0, false);
-	(void) i2c_algo_dp_aux_stop(adapter, false);
-}
-
-static int
-i2c_dp_aux_prepare_bus(struct i2c_adapter *adapter)
-{
-	adapter->algo = &i2c_dp_aux_algo;
-	adapter->retries = 3;
-	i2c_dp_aux_reset_bus(adapter);
-	return 0;
-}
-
-/**
- * i2c_dp_aux_add_bus() - register an i2c adapter using the aux ch helper
- * @adapter: i2c adapter to register
- *
- * This registers an i2c adapter that uses dp aux channel as it's underlaying
- * transport. The driver needs to fill out the &i2c_algo_dp_aux_data structure
- * and store it in the algo_data member of the @adapter argument. This will be
- * used by the i2c over dp aux algorithm to drive the hardware.
- *
- * RETURNS:
- * 0 on success, -ERRNO on failure.
- *
- * IMPORTANT:
- * This interface is deprecated, please switch to the new dp aux helpers and
- * drm_dp_aux_register().
- */
-int
-i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
-{
-	int error;
-
-	error = i2c_dp_aux_prepare_bus(adapter);
-	if (error)
-		return error;
-	error = i2c_add_adapter(adapter);
-	return error;
-}
-EXPORT_SYMBOL(i2c_dp_aux_add_bus);
-
 /* Helpers for DP link training */
 static u8 dp_link_status(const u8 link_status[DP_LINK_STATUS_SIZE], int r)
 {
@ -378,10 +186,11 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,

 	/*
 	 * The specification doesn't give any recommendation on how often to
-	 * retry native transactions, so retry 7 times like for I2C-over-AUX
-	 * transactions.
+	 * retry native transactions. We used to retry 7 times like for
+	 * aux i2c transactions but real world devices this wasn't
+	 * sufficient, bump to 32 which makes Dell 4k monitors happier.
 	 */
-	for (retry = 0; retry < 7; retry++) {
+	for (retry = 0; retry < 32; retry++) {

 		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
--- a/drivers/video/drm/drm_dp_mst_topology.c
+++ b/drivers/video/drm/drm_dp_mst_topology.c
@ -689,7 +689,7 @@ static int build_allocate_payload(struct drm_dp_sideband_msg_tx *msg, int port_n
 static int drm_dp_mst_assign_payload_id(struct drm_dp_mst_topology_mgr *mgr,
 					struct drm_dp_vcpi *vcpi)
 {
-	int ret;
+	int ret, vcpi_ret;

 	mutex_lock(&mgr->payload_lock);
 	ret = find_first_zero_bit(&mgr->payload_mask, mgr->max_payloads + 1);
@ -699,8 +699,16 @@ static int drm_dp_mst_assign_payload_id(struct drm_dp_mst_topology_mgr *mgr,
 		goto out_unlock;
 	}

+	vcpi_ret = find_first_zero_bit(&mgr->vcpi_mask, mgr->max_payloads + 1);
+	if (vcpi_ret > mgr->max_payloads) {
+		ret = -EINVAL;
+		DRM_DEBUG_KMS("out of vcpi ids %d\n", ret);
+		goto out_unlock;
+	}
+
 	set_bit(ret, &mgr->payload_mask);
-	vcpi->vcpi = ret;
+	set_bit(vcpi_ret, &mgr->vcpi_mask);
+	vcpi->vcpi = vcpi_ret + 1;
 	mgr->proposed_vcpis[ret - 1] = vcpi;
 out_unlock:
 	mutex_unlock(&mgr->payload_lock);
@ -708,15 +716,23 @@ out_unlock:
 }

 static void drm_dp_mst_put_payload_id(struct drm_dp_mst_topology_mgr *mgr,
-				      int id)
+				      int vcpi)
 {
-	if (id == 0)
+	int i;
+	if (vcpi == 0)
 		return;

 	mutex_lock(&mgr->payload_lock);
-	DRM_DEBUG_KMS("putting payload %d\n", id);
-	clear_bit(id, &mgr->payload_mask);
-	mgr->proposed_vcpis[id - 1] = NULL;
+	DRM_DEBUG_KMS("putting payload %d\n", vcpi);
+	clear_bit(vcpi - 1, &mgr->vcpi_mask);
+
+	for (i = 0; i < mgr->max_payloads; i++) {
+		if (mgr->proposed_vcpis[i])
+			if (mgr->proposed_vcpis[i]->vcpi == vcpi) {
+				mgr->proposed_vcpis[i] = NULL;
+				clear_bit(i + 1, &mgr->payload_mask);
+			}
+	}
 	mutex_unlock(&mgr->payload_lock);
 }

@ -830,6 +846,8 @@ static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb)

 static void drm_dp_port_teardown_pdt(struct drm_dp_mst_port *port, int old_pdt)
 {
+	struct drm_dp_mst_branch *mstb;
+
 	switch (old_pdt) {
 	case DP_PEER_DEVICE_DP_LEGACY_CONV:
 	case DP_PEER_DEVICE_SST_SINK:
@ -837,8 +855,9 @@ static void drm_dp_port_teardown_pdt(struct drm_dp_mst_port *port, int old_pdt)
 		drm_dp_mst_unregister_i2c_bus(&port->aux);
 		break;
 	case DP_PEER_DEVICE_MST_BRANCHING:
-		drm_dp_put_mst_branch_device(port->mstb);
+		mstb = port->mstb;
 		port->mstb = NULL;
+		drm_dp_put_mst_branch_device(mstb);
 		break;
 	}
 }
@ -849,6 +868,8 @@ static void drm_dp_destroy_port(struct kref *kref)
 	struct drm_dp_mst_topology_mgr *mgr = port->mgr;
 	if (!port->input) {
 		port->vcpi.num_slots = 0;
+
+		kfree(port->cached_edid);
 		if (port->connector)
 			(*port->mgr->cbs->destroy_connector)(mgr, port->connector);
 		drm_dp_port_teardown_pdt(port, port->pdt);
@ -1002,19 +1023,20 @@ static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb,

 static void build_mst_prop_path(struct drm_dp_mst_port *port,
 				struct drm_dp_mst_branch *mstb,
-				char *proppath)
+				char *proppath,
+				size_t proppath_size)
 {
 	int i;
 	char temp[8];
-	snprintf(proppath, 255, "mst:%d", mstb->mgr->conn_base_id);
+	snprintf(proppath, proppath_size, "mst:%d", mstb->mgr->conn_base_id);
 	for (i = 0; i < (mstb->lct - 1); i++) {
 		int shift = (i % 2) ? 0 : 4;
 		int port_num = mstb->rad[i / 2] >> shift;
-		snprintf(temp, 8, "-%d", port_num);
-		strncat(proppath, temp, 255);
+		snprintf(temp, sizeof(temp), "-%d", port_num);
+		strlcat(proppath, temp, proppath_size);
 	}
-	snprintf(temp, 8, "-%d", port->port_num);
-	strncat(proppath, temp, 255);
+	snprintf(temp, sizeof(temp), "-%d", port->port_num);
+	strlcat(proppath, temp, proppath_size);
 }

 static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
@ -1085,8 +1107,12 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,

 	if (created && !port->input) {
 		char proppath[255];
-		build_mst_prop_path(port, mstb, proppath);
+		build_mst_prop_path(port, mstb, proppath, sizeof(proppath));
 		port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, port, proppath);
+
+		if (port->port_num >= 8) {
+			port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc);
+		}
 	}

 	/* put reference to this port */
@ -1570,7 +1596,7 @@ static int drm_dp_destroy_payload_step1(struct drm_dp_mst_topology_mgr *mgr,
 	}

 	drm_dp_dpcd_write_payload(mgr, id, payload);
-	payload->payload_state = 0;
+	payload->payload_state = DP_PAYLOAD_DELETE_LOCAL;
 	return 0;
 }

@ -1597,7 +1623,7 @@ static int drm_dp_destroy_payload_step2(struct drm_dp_mst_topology_mgr *mgr,
 */
 int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 {
-	int i;
+	int i, j;
 	int cur_slots = 1;
 	struct drm_dp_payload req_payload;
 	struct drm_dp_mst_port *port;
@ -1614,26 +1640,46 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 			port = NULL;
 			req_payload.num_slots = 0;
 		}
+
+		if (mgr->payloads[i].start_slot != req_payload.start_slot) {
+			mgr->payloads[i].start_slot = req_payload.start_slot;
+		}
 		/* work out what is required to happen with this payload */
-		if (mgr->payloads[i].start_slot != req_payload.start_slot ||
-		    mgr->payloads[i].num_slots != req_payload.num_slots) {
+		if (mgr->payloads[i].num_slots != req_payload.num_slots) {

 			/* need to push an update for this payload */
 			if (req_payload.num_slots) {
-				drm_dp_create_payload_step1(mgr, i + 1, &req_payload);
+				drm_dp_create_payload_step1(mgr, mgr->proposed_vcpis[i]->vcpi, &req_payload);
 				mgr->payloads[i].num_slots = req_payload.num_slots;
 			} else if (mgr->payloads[i].num_slots) {
 				mgr->payloads[i].num_slots = 0;
-				drm_dp_destroy_payload_step1(mgr, port, i + 1, &mgr->payloads[i]);
+				drm_dp_destroy_payload_step1(mgr, port, port->vcpi.vcpi, &mgr->payloads[i]);
 				req_payload.payload_state = mgr->payloads[i].payload_state;
-			} else
-				req_payload.payload_state = 0;
-
-			mgr->payloads[i].start_slot = req_payload.start_slot;
+				mgr->payloads[i].start_slot = 0;
+			}
 			mgr->payloads[i].payload_state = req_payload.payload_state;
 		}
 		cur_slots += req_payload.num_slots;
 	}
+
+	for (i = 0; i < mgr->max_payloads; i++) {
+		if (mgr->payloads[i].payload_state == DP_PAYLOAD_DELETE_LOCAL) {
+			DRM_DEBUG_KMS("removing payload %d\n", i);
+			for (j = i; j < mgr->max_payloads - 1; j++) {
+				memcpy(&mgr->payloads[j], &mgr->payloads[j + 1], sizeof(struct drm_dp_payload));
+				mgr->proposed_vcpis[j] = mgr->proposed_vcpis[j + 1];
+				if (mgr->proposed_vcpis[j] && mgr->proposed_vcpis[j]->num_slots) {
+					set_bit(j + 1, &mgr->payload_mask);
+				} else {
+					clear_bit(j + 1, &mgr->payload_mask);
+				}
+			}
+			memset(&mgr->payloads[mgr->max_payloads - 1], 0, sizeof(struct drm_dp_payload));
+			mgr->proposed_vcpis[mgr->max_payloads - 1] = NULL;
+			clear_bit(mgr->max_payloads, &mgr->payload_mask);
+
+		}
+	}
 	mutex_unlock(&mgr->payload_lock);

 	return 0;
@ -1664,9 +1710,9 @@ int drm_dp_update_payload_part2(struct drm_dp_mst_topology_mgr *mgr)

 		DRM_DEBUG_KMS("payload %d %d\n", i, mgr->payloads[i].payload_state);
 		if (mgr->payloads[i].payload_state == DP_PAYLOAD_LOCAL) {
-			ret = drm_dp_create_payload_step2(mgr, port, i + 1, &mgr->payloads[i]);
+			ret = drm_dp_create_payload_step2(mgr, port, mgr->proposed_vcpis[i]->vcpi, &mgr->payloads[i]);
 		} else if (mgr->payloads[i].payload_state == DP_PAYLOAD_DELETE_LOCAL) {
-			ret = drm_dp_destroy_payload_step2(mgr, i + 1, &mgr->payloads[i]);
+			ret = drm_dp_destroy_payload_step2(mgr, mgr->proposed_vcpis[i]->vcpi, &mgr->payloads[i]);
 		}
 		if (ret) {
 			mutex_unlock(&mgr->payload_lock);
@ -1769,17 +1815,27 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
 	return 0;
 }

-static int drm_dp_get_vc_payload_bw(int dp_link_bw, int dp_link_count)
+static bool drm_dp_get_vc_payload_bw(int dp_link_bw,
+				     int dp_link_count,
+				     int *out)
 {
 	switch (dp_link_bw) {
+	default:
+		DRM_DEBUG_KMS("invalid link bandwidth in DPCD: %x (link count: %d)\n",
+			      dp_link_bw, dp_link_count);
+		return false;
+
 	case DP_LINK_BW_1_62:
-		return 3 * dp_link_count;
+		*out = 3 * dp_link_count;
+		break;
 	case DP_LINK_BW_2_7:
-		return 5 * dp_link_count;
+		*out = 5 * dp_link_count;
+		break;
 	case DP_LINK_BW_5_4:
-		return 10 * dp_link_count;
+		*out = 10 * dp_link_count;
+		break;
 	}
-	return 0;
+	return true;
 }

 /**
@ -1811,7 +1867,13 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
 			goto out_unlock;
 		}

-		mgr->pbn_div = drm_dp_get_vc_payload_bw(mgr->dpcd[1], mgr->dpcd[2] & DP_MAX_LANE_COUNT_MASK);
+		if (!drm_dp_get_vc_payload_bw(mgr->dpcd[1],
+					      mgr->dpcd[2] & DP_MAX_LANE_COUNT_MASK,
+					      &mgr->pbn_div)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
 		mgr->total_pbn = 2560;
 		mgr->total_slots = DIV_ROUND_UP(mgr->total_pbn, mgr->pbn_div);
 		mgr->avail_slots = mgr->total_slots;
@ -1868,6 +1930,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
 		memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload));
 		mgr->payload_mask = 0;
 		set_bit(0, &mgr->payload_mask);
+		mgr->vcpi_mask = 0;
 	}

 out_unlock:
@ -2078,6 +2141,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 * drm_dp_mst_hpd_irq() - MST hotplug IRQ notify
 * @mgr: manager to notify irq for.
 * @esi: 4 bytes from SINK_COUNT_ESI
+ * @handled: whether the hpd interrupt was consumed or not
 *
 * This should be called from the driver when it detects a short IRQ,
 * along with the value of the DEVICE_SERVICE_IRQ_VECTOR_ESI0. The
@ -2119,7 +2183,8 @@ EXPORT_SYMBOL(drm_dp_mst_hpd_irq);
 * This returns the current connection state for a port. It validates the
 * port pointer still exists so the caller doesn't require a reference
 */
-enum drm_connector_status drm_dp_mst_detect_port(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
+enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector,
+						 struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
 {
 	enum drm_connector_status status = connector_status_disconnected;

@ -2138,6 +2203,10 @@ enum drm_connector_status drm_dp_mst_detect_port(struct drm_dp_mst_topology_mgr

 	case DP_PEER_DEVICE_SST_SINK:
 		status = connector_status_connected;
+		/* for logical ports - cache the EDID */
+		if (port->port_num >= 8 && !port->cached_edid) {
+			port->cached_edid = drm_get_edid(connector, &port->aux.ddc);
+		}
 		break;
 	case DP_PEER_DEVICE_DP_LEGACY_CONV:
 		if (port->ldps)
@ -2169,7 +2238,12 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_
 	if (!port)
 		return NULL;

+	if (port->cached_edid)
+		edid = drm_edid_duplicate(port->cached_edid);
+	else
 	edid = drm_get_edid(connector, &port->aux.ddc);
+
+	drm_mode_connector_set_tile_property(connector);
 	drm_dp_put_port(port);
 	return edid;
 }
--- a/drivers/video/drm/drm_edid.c
+++ b/drivers/video/drm/drm_edid.c
@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_displayid.h>

 #define version_greater(edid, maj, min) \
 	(((edid)->version > (maj)) || \
@ -632,27 +633,27 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC |
 			DRM_MODE_FLAG_INTERLACE),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 6 - 1440x480i@60Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 6 - 720(1440)x480i@60Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 7 - 1440x480i@60Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 7 - 720(1440)x480i@60Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 8 - 1440x240@60Hz */
-	{ DRM_MODE("1440x240", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 240, 244, 247, 262, 0,
+	/* 8 - 720(1440)x240@60Hz */
+	{ DRM_MODE("720x240", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 240, 244, 247, 262, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 9 - 1440x240@60Hz */
-	{ DRM_MODE("1440x240", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 240, 244, 247, 262, 0,
+	/* 9 - 720(1440)x240@60Hz */
+	{ DRM_MODE("720x240", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 240, 244, 247, 262, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@ -714,27 +715,27 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC |
 			DRM_MODE_FLAG_INTERLACE),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 21 - 1440x576i@50Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 21 - 720(1440)x576i@50Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 22 - 1440x576i@50Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 22 - 720(1440)x576i@50Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 23 - 1440x288@50Hz */
-	{ DRM_MODE("1440x288", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 288, 290, 293, 312, 0,
+	/* 23 - 720(1440)x288@50Hz */
+	{ DRM_MODE("720x288", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 288, 290, 293, 312, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 24 - 1440x288@50Hz */
-	{ DRM_MODE("1440x288", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 288, 290, 293, 312, 0,
+	/* 24 - 720(1440)x288@50Hz */
+	{ DRM_MODE("720x288", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 288, 290, 293, 312, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@ -837,17 +838,17 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   796, 864, 0, 576, 581, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 44 - 1440x576i@100Hz */
-	{ DRM_MODE("1440x576", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 44 - 720(1440)x576i@100Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 27000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
-			DRM_MODE_FLAG_DBLCLK),
+			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 45 - 1440x576i@100Hz */
-	{ DRM_MODE("1440x576", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 45 - 720(1440)x576i@100Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 27000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
-			DRM_MODE_FLAG_DBLCLK),
+			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
 	/* 46 - 1920x1080i@120Hz */
 	{ DRM_MODE("1920x1080i", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2008,
@ -870,15 +871,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   798, 858, 0, 480, 489, 495, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 50 - 1440x480i@120Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 50 - 720(1440)x480i@120Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 27000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 51 - 1440x480i@120Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 51 - 720(1440)x480i@120Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 27000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@ -892,15 +893,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   796, 864, 0, 576, 581, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 54 - 1440x576i@200Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 54 - 720(1440)x576i@200Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 54000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 55 - 1440x576i@200Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 55 - 720(1440)x576i@200Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 54000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@ -914,15 +915,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   798, 858, 0, 480, 489, 495, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 58 - 1440x480i@240 */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 58 - 720(1440)x480i@240 */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 54000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 59 - 1440x480i@240 */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 59 - 720(1440)x480i@240 */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 54000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@ -1014,6 +1015,27 @@ module_param_named(edid_fixup, edid_fixup, int, 0400);
 MODULE_PARM_DESC(edid_fixup,
 		 "Minimum number of valid EDID header bytes (0-8, default 6)");

+static void drm_get_displayid(struct drm_connector *connector,
+			      struct edid *edid);
+
+static int drm_edid_block_checksum(const u8 *raw_edid)
+{
+	int i;
+	u8 csum = 0;
+	for (i = 0; i < EDID_LENGTH; i++)
+		csum += raw_edid[i];
+
+	return csum;
+}
+
+static bool drm_edid_is_zero(const u8 *in_edid, int length)
+{
+	if (memchr_inv(in_edid, 0, length))
+		return false;
+
+	return true;
+}
+
 /**
 * drm_edid_block_valid - Sanity check the EDID block (base or extension)
 * @raw_edid: pointer to raw EDID block
@ -1027,8 +1049,7 @@ MODULE_PARM_DESC(edid_fixup,
 */
 bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid)
 {
-	int i;
-	u8 csum = 0;
+	u8 csum;
 	struct edid *edid = (struct edid *)raw_edid;

 	if (WARN_ON(!raw_edid))
@ -1048,8 +1069,7 @@ bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid)
 		}
 	}

-	for (i = 0; i < EDID_LENGTH; i++)
-		csum += raw_edid[i];
+	csum = drm_edid_block_checksum(raw_edid);
 	if (csum) {
 		if (print_bad_edid) {
 		DRM_ERROR("EDID checksum is invalid, remainder is %d\n", csum);
@ -1080,10 +1100,14 @@ bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid)

 bad:
 	if (print_bad_edid) {
+		if (drm_edid_is_zero(raw_edid, EDID_LENGTH)) {
+			printk(KERN_ERR "EDID block is all zeroes\n");
+		} else {
 		printk(KERN_ERR "Raw EDID:\n");
 		print_hex_dump(KERN_ERR, " \t", DUMP_PREFIX_NONE, 16, 1,
 			       raw_edid, EDID_LENGTH, false);
 	}
+	}
 	return false;
 }
 EXPORT_SYMBOL(drm_edid_block_valid);
@ -1115,7 +1139,7 @@ EXPORT_SYMBOL(drm_edid_is_valid);
 #define DDC_SEGMENT_ADDR 0x30
 /**
 * drm_do_probe_ddc_edid() - get EDID information via I2C
- * @adapter: I2C device adaptor
+ * @data: I2C device adapter
 * @buf: EDID data buffer to be filled
 * @block: 128 byte EDID block to start fetching from
 * @len: EDID data buffer length to fetch
@ -1125,9 +1149,9 @@ EXPORT_SYMBOL(drm_edid_is_valid);
 * Return: 0 on success or -1 on failure.
 */
 static int
-drm_do_probe_ddc_edid(struct i2c_adapter *adapter, unsigned char *buf,
-		      int block, int len)
+drm_do_probe_ddc_edid(void *data, u8 *buf, unsigned int block, size_t len)
 {
+	struct i2c_adapter *adapter = data;
 	unsigned char start = block * EDID_LENGTH;
 	unsigned char segment = block >> 1;
 	unsigned char xfers = segment ? 3 : 2;
@ -1176,16 +1200,26 @@ drm_do_probe_ddc_edid(struct i2c_adapter *adapter, unsigned char *buf,
 	return ret == xfers ? 0 : -1;
 }

-static bool drm_edid_is_zero(u8 *in_edid, int length)
-{
-	if (memchr_inv(in_edid, 0, length))
-		return false;
-
-	return true;
-}
-
-static u8 *
-drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)
+/**
+ * drm_do_get_edid - get EDID data using a custom EDID block read function
+ * @connector: connector we're probing
+ * @get_edid_block: EDID block read function
+ * @data: private data passed to the block read function
+ *
+ * When the I2C adapter connected to the DDC bus is hidden behind a device that
+ * exposes a different interface to read EDID blocks this function can be used
+ * to get EDID data using a custom block read function.
+ *
+ * As in the general case the DDC bus is accessible by the kernel at the I2C
+ * level, drivers must make all reasonable efforts to expose it as an I2C
+ * adapter and use drm_get_edid() instead of abusing this function.
+ *
+ * Return: Pointer to valid EDID or NULL if we couldn't find any.
+ */
+struct edid *drm_do_get_edid(struct drm_connector *connector,
+	int (*get_edid_block)(void *data, u8 *buf, unsigned int block,
+			      size_t len),
+	void *data)
 {
 	int i, j = 0, valid_extensions = 0;
 	u8 *block, *new;
@ -1196,7 +1230,7 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)

 	/* base block fetch */
 	for (i = 0; i < 4; i++) {
-		if (drm_do_probe_ddc_edid(adapter, block, 0, EDID_LENGTH))
+		if (get_edid_block(data, block, 0, EDID_LENGTH))
            goto out;
 		if (drm_edid_block_valid(block, 0, print_bad_edid))
 			break;
@ -1210,7 +1244,7 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)

 	/* if there's no extensions, we're done */
 	if (block[0x7e] == 0)
-		return block;
+		return (struct edid *)block;

 	new = krealloc(block, (block[0x7e] + 1) * EDID_LENGTH, GFP_KERNEL);
 	if (!new)
@ -1219,7 +1253,7 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)

 	for (j = 1; j <= block[0x7e]; j++) {
 		for (i = 0; i < 4; i++) {
-			if (drm_do_probe_ddc_edid(adapter,
+			if (get_edid_block(data,
 				  block + (valid_extensions + 1) * EDID_LENGTH,
 				  j, EDID_LENGTH))
 				goto out;
@ -1247,7 +1281,7 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)
 		block = new;
 	}

-	return block;
+	return (struct edid *)block;

 carp:
 	if (print_bad_edid) {
@ -1260,6 +1294,7 @@ out:
 	kfree(block);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(drm_do_get_edid);

 /**
 * drm_probe_ddc() - probe DDC presence
@ -1289,11 +1324,14 @@ EXPORT_SYMBOL(drm_probe_ddc);
 struct edid *drm_get_edid(struct drm_connector *connector,
 			  struct i2c_adapter *adapter)
 {
-	struct edid *edid = NULL;
+	struct edid *edid;

-	if (drm_probe_ddc(adapter))
-		edid = (struct edid *)drm_do_get_edid(connector, adapter);
+	if (!drm_probe_ddc(adapter))
+		return NULL;

+	edid = drm_do_get_edid(connector, drm_do_probe_ddc_edid, adapter);
+	if (edid)
+		drm_get_displayid(connector, edid);
 	return edid;
 }
 EXPORT_SYMBOL(drm_get_edid);
@ -2103,7 +2141,8 @@ static int
 add_inferred_modes(struct drm_connector *connector, struct edid *edid)
 {
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};

 	if (version_greater(edid, 1, 0))
@ -2169,7 +2208,8 @@ add_established_modes(struct drm_connector *connector, struct edid *edid)
 		((edid->established_timings.mfg_rsvd & 0x80) << 9);
 	int i, modes = 0;
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};

 	for (i = 0; i <= EDID_EST_TIMINGS; i++) {
@ -2227,7 +2267,8 @@ add_standard_modes(struct drm_connector *connector, struct edid *edid)
 {
 	int i, modes = 0;
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};

 	for (i = 0; i < EDID_STD_TIMINGS; i++) {
@ -2313,7 +2354,8 @@ static int
 add_cvt_modes(struct drm_connector *connector, struct edid *edid)
 {
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};

 	if (version_greater(edid, 1, 2))
@ -2357,11 +2399,10 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 		   u32 quirks)
 {
 	struct detailed_mode_closure closure = {
-		connector,
-		edid,
-		1,
-		quirks,
-		0
+		.connector = connector,
+		.edid = edid,
+		.preferred = 1,
+		.quirks = quirks,
 	};

 	if (closure.preferred && !version_greater(edid, 1, 3))
@ -2386,7 +2427,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 /*
 * Search EDID for CEA extension block.
 */
-static u8 *drm_find_cea_extension(struct edid *edid)
+static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
 {
 	u8 *edid_ext = NULL;
 	int i;
@ -2398,7 +2439,7 @@ static u8 *drm_find_cea_extension(struct edid *edid)
 	/* Find CEA extension */
 	for (i = 0; i < edid->extensions; i++) {
 		edid_ext = (u8 *)edid + EDID_LENGTH * (i + 1);
-		if (edid_ext[0] == CEA_EXT)
+		if (edid_ext[0] == ext_id)
 			break;
 	}

@ -2408,6 +2449,16 @@ static u8 *drm_find_cea_extension(struct edid *edid)
 	return edid_ext;
 }

+static u8 *drm_find_cea_extension(struct edid *edid)
+{
+	return drm_find_edid_extension(edid, CEA_EXT);
+}
+
+static u8 *drm_find_displayid_extension(struct edid *edid)
+{
+	return drm_find_edid_extension(edid, DISPLAYID_EXT);
+}
+
 /*
 * Calculate the alternate clock for the CEA mode
 * (60Hz vs. 59.94Hz etc.)
@ -2737,12 +2788,12 @@ static int add_3d_struct_modes(struct drm_connector *connector, u16 structure,
 		newmode = drm_display_mode_from_vic_index(connector, video_db,
 							  video_len,
 							  video_index);
-			if (newmode) {
+		if (newmode) {
 			newmode->flags |= DRM_MODE_FLAG_3D_FRAME_PACKING;
-				drm_mode_probed_add(connector, newmode);
-				modes++;
-			}
+			drm_mode_probed_add(connector, newmode);
+			modes++;
 		}
+	}
 	if (structure & (1 << 6)) {
 		newmode = drm_display_mode_from_vic_index(connector, video_db,
 							  video_len,
@ -2986,7 +3037,7 @@ add_cea_modes(struct drm_connector *connector, struct edid *edid)
 				hdmi = db;
 				hdmi_len = dbl;
 		}
-		}
+	}
 	}

 	/*
@ -3125,9 +3176,12 @@ void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 		}
 	}
 	eld[5] |= sad_count << 4;
-	eld[2] = (20 + mnl + sad_count * 3 + 3) / 4;

-	DRM_DEBUG_KMS("ELD size %d, SAD count %d\n", (int)eld[2], sad_count);
+	eld[DRM_ELD_BASELINE_ELD_LEN] =
+		DIV_ROUND_UP(drm_eld_calc_baseline_block_size(eld), 4);
+
+	DRM_DEBUG_KMS("ELD size %d, SAD count %d\n",
+		      drm_eld_size(eld), sad_count);
 }
 EXPORT_SYMBOL(drm_edid_to_eld);

@ -3433,10 +3487,10 @@ EXPORT_SYMBOL(drm_rgb_quant_range_selectable);
 /**
 * drm_assign_hdmi_deep_color_info - detect whether monitor supports
 * hdmi deep color modes and update drm_display_info if so.
- *
 * @edid: monitor EDID information
 * @info: Updated with maximum supported deep color bpc and color format
 *        if deep color supported.
+ * @connector: DRM connector, used only for debug output
 *
 * Parse the CEA extension according to CEA-861-B.
 * Return true if HDMI deep color supported, false if not or unknown.
@ -3865,3 +3919,123 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
 	return 0;
 }
 EXPORT_SYMBOL(drm_hdmi_vendor_infoframe_from_display_mode);
+
+static int drm_parse_display_id(struct drm_connector *connector,
+				u8 *displayid, int length,
+				bool is_edid_extension)
+{
+	/* if this is an EDID extension the first byte will be 0x70 */
+	int idx = 0;
+	struct displayid_hdr *base;
+	struct displayid_block *block;
+	u8 csum = 0;
+	int i;
+
+	if (is_edid_extension)
+		idx = 1;
+
+	base = (struct displayid_hdr *)&displayid[idx];
+
+	DRM_DEBUG_KMS("base revision 0x%x, length %d, %d %d\n",
+		      base->rev, base->bytes, base->prod_id, base->ext_count);
+
+	if (base->bytes + 5 > length - idx)
+		return -EINVAL;
+
+	for (i = idx; i <= base->bytes + 5; i++) {
+		csum += displayid[i];
+	}
+	if (csum) {
+		DRM_ERROR("DisplayID checksum invalid, remainder is %d\n", csum);
+		return -EINVAL;
+	}
+
+	block = (struct displayid_block *)&displayid[idx + 4];
+	DRM_DEBUG_KMS("block id %d, rev %d, len %d\n",
+		      block->tag, block->rev, block->num_bytes);
+
+	switch (block->tag) {
+	case DATA_BLOCK_TILED_DISPLAY: {
+		struct displayid_tiled_block *tile = (struct displayid_tiled_block *)block;
+
+		u16 w, h;
+		u8 tile_v_loc, tile_h_loc;
+		u8 num_v_tile, num_h_tile;
+		struct drm_tile_group *tg;
+
+		w = tile->tile_size[0] | tile->tile_size[1] << 8;
+		h = tile->tile_size[2] | tile->tile_size[3] << 8;
+
+		num_v_tile = (tile->topo[0] & 0xf) | (tile->topo[2] & 0x30);
+		num_h_tile = (tile->topo[0] >> 4) | ((tile->topo[2] >> 2) & 0x30);
+		tile_v_loc = (tile->topo[1] & 0xf) | ((tile->topo[2] & 0x3) << 4);
+		tile_h_loc = (tile->topo[1] >> 4) | (((tile->topo[2] >> 2) & 0x3) << 4);
+
+		connector->has_tile = true;
+		if (tile->tile_cap & 0x80)
+			connector->tile_is_single_monitor = true;
+
+		connector->num_h_tile = num_h_tile + 1;
+		connector->num_v_tile = num_v_tile + 1;
+		connector->tile_h_loc = tile_h_loc;
+		connector->tile_v_loc = tile_v_loc;
+		connector->tile_h_size = w + 1;
+		connector->tile_v_size = h + 1;
+
+		DRM_DEBUG_KMS("tile cap 0x%x\n", tile->tile_cap);
+		DRM_DEBUG_KMS("tile_size %d x %d\n", w + 1, h + 1);
+		DRM_DEBUG_KMS("topo num tiles %dx%d, location %dx%d\n",
+		       num_h_tile + 1, num_v_tile + 1, tile_h_loc, tile_v_loc);
+		DRM_DEBUG_KMS("vend %c%c%c\n", tile->topology_id[0], tile->topology_id[1], tile->topology_id[2]);
+
+		tg = drm_mode_get_tile_group(connector->dev, tile->topology_id);
+		if (!tg) {
+			tg = drm_mode_create_tile_group(connector->dev, tile->topology_id);
+		}
+		if (!tg)
+			return -ENOMEM;
+
+		if (connector->tile_group != tg) {
+			/* if we haven't got a pointer,
+			   take the reference, drop ref to old tile group */
+			if (connector->tile_group) {
+				drm_mode_put_tile_group(connector->dev, connector->tile_group);
+			}
+			connector->tile_group = tg;
+		} else
+			/* if same tile group, then release the ref we just took. */
+			drm_mode_put_tile_group(connector->dev, tg);
+	}
+		break;
+	default:
+		printk("unknown displayid tag %d\n", block->tag);
+		break;
+	}
+	return 0;
+}
+
+static void drm_get_displayid(struct drm_connector *connector,
+			      struct edid *edid)
+{
+	void *displayid = NULL;
+	int ret;
+	connector->has_tile = false;
+	displayid = drm_find_displayid_extension(edid);
+	if (!displayid) {
+		/* drop reference to any tile group we had */
+		goto out_drop_ref;
+	}
+
+	ret = drm_parse_display_id(connector, displayid, EDID_LENGTH, true);
+	if (ret < 0)
+		goto out_drop_ref;
+	if (!connector->has_tile)
+		goto out_drop_ref;
+	return;
+out_drop_ref:
+	if (connector->tile_group) {
+		drm_mode_put_tile_group(connector->dev, connector->tile_group);
+		connector->tile_group = NULL;
+	}
+	return;
+}
--- a/drivers/video/drm/drm_fb_helper.c
+++ b/drivers/video/drm/drm_fb_helper.c
@ -126,7 +126,7 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_

 	WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex));
 	if (fb_helper->connector_count + 1 > fb_helper->connector_info_alloc_count) {
-		temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector) * (fb_helper->connector_count + 1), GFP_KERNEL);
+		temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector *) * (fb_helper->connector_count + 1), GFP_KERNEL);
 		if (!temp)
 			return -ENOMEM;

@ -170,6 +170,7 @@ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_remove_one_connector);
+
 static void drm_fb_helper_save_lut_atomic(struct drm_crtc *crtc, struct drm_fb_helper *helper)
 {
 	uint16_t *r_base, *g_base, *b_base;
@ -210,10 +211,17 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper)

 	drm_warn_on_modeset_not_all_locked(dev);

-	list_for_each_entry(plane, &dev->mode_config.plane_list, head)
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
 		if (plane->type != DRM_PLANE_TYPE_PRIMARY)
 			drm_plane_force_disable(plane);

+		if (dev->mode_config.rotation_property) {
+			drm_mode_plane_set_obj_prop(plane,
+						    dev->mode_config.rotation_property,
+						    BIT(DRM_ROTATE_0));
+		}
+	}
+
 	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
 		struct drm_crtc *crtc = mode_set->crtc;
@ -243,6 +251,8 @@ bool drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
 	bool ret;
+	bool do_delayed = false;
+
 	drm_modeset_lock_all(dev);
 	ret = restore_fbdev_mode(fb_helper);
 	drm_modeset_unlock_all(dev);
@ -706,10 +716,6 @@ int drm_fb_helper_set_par(struct fb_info *info)

 	drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);

-	if (fb_helper->delayed_hotplug) {
-		fb_helper->delayed_hotplug = false;
-		drm_fb_helper_hotplug_event(fb_helper);
-	}
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_set_par);
@ -784,7 +790,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
 		struct drm_cmdline_mode *cmdline_mode;

-		cmdline_mode = &fb_helper_conn->cmdline_mode;
+		cmdline_mode = &fb_helper_conn->connector->cmdline_mode;

 		if (cmdline_mode->bpp_specified) {
 			switch (cmdline_mode->bpp) {
@ -813,19 +819,21 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	crtc_count = 0;
 	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_display_mode *desired_mode;
+		int x, y;
 		desired_mode = fb_helper->crtc_info[i].desired_mode;
-
+		x = fb_helper->crtc_info[i].x;
+		y = fb_helper->crtc_info[i].y;
 		if (desired_mode) {
 			if (gamma_size == 0)
 				gamma_size = fb_helper->crtc_info[i].mode_set.crtc->gamma_size;
-			if (desired_mode->hdisplay < sizes.fb_width)
-				sizes.fb_width = desired_mode->hdisplay;
-			if (desired_mode->vdisplay < sizes.fb_height)
-				sizes.fb_height = desired_mode->vdisplay;
-			if (desired_mode->hdisplay > sizes.surface_width)
-				sizes.surface_width = desired_mode->hdisplay;
-			if (desired_mode->vdisplay > sizes.surface_height)
-				sizes.surface_height = desired_mode->vdisplay;
+			if (desired_mode->hdisplay + x < sizes.fb_width)
+				sizes.fb_width = desired_mode->hdisplay + x;
+			if (desired_mode->vdisplay + y < sizes.fb_height)
+				sizes.fb_height = desired_mode->vdisplay + y;
+			if (desired_mode->hdisplay + x > sizes.surface_width)
+				sizes.surface_width = desired_mode->hdisplay + x;
+			if (desired_mode->vdisplay + y > sizes.surface_height)
+				sizes.surface_height = desired_mode->vdisplay + y;
 			crtc_count++;
       }
 	}
@ -858,6 +866,8 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,


 		info->var.pixclock = 0;
+	dev_info(fb_helper->dev->dev, "fb%d: %s frame buffer device\n",
+			info->node, info->fix.id);

        list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list);

@ -1017,9 +1027,7 @@ EXPORT_SYMBOL(drm_has_preferred_mode);

 static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector)
 {
-	struct drm_cmdline_mode *cmdline_mode;
-	cmdline_mode = &fb_connector->cmdline_mode;
-	return cmdline_mode->specified;
+	return fb_connector->connector->cmdline_mode.specified;
 }

 struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn,
@ -1031,7 +1039,6 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f

 	return NULL;

-	cmdline_mode = &fb_helper_conn->cmdline_mode;
 	if (cmdline_mode->specified == false)
 		return mode;

@ -1115,6 +1122,7 @@ static void drm_enable_connectors(struct drm_fb_helper *fb_helper,

 static bool drm_target_cloned(struct drm_fb_helper *fb_helper,
 			      struct drm_display_mode **modes,
+			      struct drm_fb_offset *offsets,
 			      bool *enabled, int width, int height)
 {
 	int count, i, j;
@ -1186,27 +1194,88 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper,
 	return false;
 }

+static int drm_get_tile_offsets(struct drm_fb_helper *fb_helper,
+				struct drm_display_mode **modes,
+				struct drm_fb_offset *offsets,
+				int idx,
+				int h_idx, int v_idx)
+{
+	struct drm_fb_helper_connector *fb_helper_conn;
+	int i;
+	int hoffset = 0, voffset = 0;
+
+	for (i = 0; i < fb_helper->connector_count; i++) {
+		fb_helper_conn = fb_helper->connector_info[i];
+		if (!fb_helper_conn->connector->has_tile)
+			continue;
+
+		if (!modes[i] && (h_idx || v_idx)) {
+			DRM_DEBUG_KMS("no modes for connector tiled %d %d\n", i,
+				      fb_helper_conn->connector->base.id);
+			continue;
+		}
+		if (fb_helper_conn->connector->tile_h_loc < h_idx)
+			hoffset += modes[i]->hdisplay;
+
+		if (fb_helper_conn->connector->tile_v_loc < v_idx)
+			voffset += modes[i]->vdisplay;
+	}
+	offsets[idx].x = hoffset;
+	offsets[idx].y = voffset;
+	DRM_DEBUG_KMS("returned %d %d for %d %d\n", hoffset, voffset, h_idx, v_idx);
+	return 0;
+}
+
 static bool drm_target_preferred(struct drm_fb_helper *fb_helper,
 				 struct drm_display_mode **modes,
+				 struct drm_fb_offset *offsets,
 				 bool *enabled, int width, int height)
 {
 	struct drm_fb_helper_connector *fb_helper_conn;
 	int i;
-
+	uint64_t conn_configured = 0, mask;
+	int tile_pass = 0;
+	mask = (1 << fb_helper->connector_count) - 1;
+retry:
 	for (i = 0; i < fb_helper->connector_count; i++) {
 		fb_helper_conn = fb_helper->connector_info[i];

-		if (enabled[i] == false)
+		if (conn_configured & (1 << i))
 			continue;

+		if (enabled[i] == false) {
+			conn_configured |= (1 << i);
+			continue;
+		}
+
+		/* first pass over all the untiled connectors */
+		if (tile_pass == 0 && fb_helper_conn->connector->has_tile)
+			continue;
+
+		if (tile_pass == 1) {
+			if (fb_helper_conn->connector->tile_h_loc != 0 ||
+			    fb_helper_conn->connector->tile_v_loc != 0)
+			continue;
+
+		} else {
+			if (fb_helper_conn->connector->tile_h_loc != tile_pass -1 &&
+			    fb_helper_conn->connector->tile_v_loc != tile_pass - 1)
+			/* if this tile_pass doesn't cover any of the tiles - keep going */
+				continue;
+
+			/* find the tile offsets for this pass - need
+			   to find all tiles left and above */
+			drm_get_tile_offsets(fb_helper, modes, offsets,
+					     i, fb_helper_conn->connector->tile_h_loc, fb_helper_conn->connector->tile_v_loc);
+		}
 		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
 			      fb_helper_conn->connector->base.id);

 		/* got for command line mode first */
 		modes[i] = drm_pick_cmdline_mode(fb_helper_conn, width, height);
 		if (!modes[i]) {
-			DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
-				      fb_helper_conn->connector->base.id);
+			DRM_DEBUG_KMS("looking for preferred mode on connector %d %d\n",
+				      fb_helper_conn->connector->base.id, fb_helper_conn->connector->tile_group ? fb_helper_conn->connector->tile_group->id : 0);
 			modes[i] = drm_has_preferred_mode(fb_helper_conn, width, height);
 		}
 		/* No preferred modes, pick one off the list */
@ -1216,6 +1285,12 @@ static bool drm_target_preferred(struct drm_fb_helper *fb_helper,
 		}
 		DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name :
 			  "none");
+		conn_configured |= (1 << i);
+	}
+
+	if ((conn_configured & mask) != mask) {
+		tile_pass++;
+		goto retry;
 	}
 	return true;
 }
@ -1305,6 +1380,7 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_fb_helper_crtc **crtcs;
 	struct drm_display_mode **modes;
+	struct drm_fb_offset *offsets;
 	struct drm_mode_set *modeset;
 	bool *enabled;
 	int width, height;
@ -1319,9 +1395,11 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 			sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL);
 	modes = kcalloc(dev->mode_config.num_connector,
 			sizeof(struct drm_display_mode *), GFP_KERNEL);
+	offsets = kcalloc(dev->mode_config.num_connector,
+			  sizeof(struct drm_fb_offset), GFP_KERNEL);
 	enabled = kcalloc(dev->mode_config.num_connector,
 			  sizeof(bool), GFP_KERNEL);
-	if (!crtcs || !modes || !enabled) {
+	if (!crtcs || !modes || !enabled || !offsets) {
 		DRM_ERROR("Memory allocation failed\n");
 		goto out;
 	}
@ -1331,14 +1409,16 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)

 	if (!(fb_helper->funcs->initial_config &&
 	      fb_helper->funcs->initial_config(fb_helper, crtcs, modes,
+					       offsets,
 					       enabled, width, height))) {
 		memset(modes, 0, dev->mode_config.num_connector*sizeof(modes[0]));
 		memset(crtcs, 0, dev->mode_config.num_connector*sizeof(crtcs[0]));
+		memset(offsets, 0, dev->mode_config.num_connector*sizeof(offsets[0]));

-		if (!drm_target_cloned(fb_helper,
-				       modes, enabled, width, height) &&
-		    !drm_target_preferred(fb_helper,
-					  modes, enabled, width, height))
+		if (!drm_target_cloned(fb_helper, modes, offsets,
+				       enabled, width, height) &&
+		    !drm_target_preferred(fb_helper, modes, offsets,
+					  enabled, width, height))
 			DRM_ERROR("Unable to find initial modes\n");

 		DRM_DEBUG_KMS("picking CRTCs for %dx%d config\n",
@ -1358,18 +1438,23 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 	for (i = 0; i < fb_helper->connector_count; i++) {
 		struct drm_display_mode *mode = modes[i];
 		struct drm_fb_helper_crtc *fb_crtc = crtcs[i];
+		struct drm_fb_offset *offset = &offsets[i];
 		modeset = &fb_crtc->mode_set;

 		if (mode && fb_crtc) {
-			DRM_DEBUG_KMS("desired mode %s set on crtc %d\n",
-				      mode->name, fb_crtc->mode_set.crtc->base.id);
+			DRM_DEBUG_KMS("desired mode %s set on crtc %d (%d,%d)\n",
+				      mode->name, fb_crtc->mode_set.crtc->base.id, offset->x, offset->y);
 			fb_crtc->desired_mode = mode;
+			fb_crtc->x = offset->x;
+			fb_crtc->y = offset->y;
 			if (modeset->mode)
 				drm_mode_destroy(dev, modeset->mode);
 			modeset->mode = drm_mode_duplicate(dev,
 							   fb_crtc->desired_mode);
 			modeset->connectors[modeset->num_connectors++] = fb_helper->connector_info[i]->connector;
 			modeset->fb = fb_helper->fb;
+			modeset->x = offset->x;
+			modeset->y = offset->y;
 		}
 	}

@ -1378,7 +1463,6 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 		modeset = &fb_helper->crtc_info[i].mode_set;
 		if (modeset->num_connectors == 0) {
 			BUG_ON(modeset->fb);
-			BUG_ON(modeset->num_connectors);
 			if (modeset->mode)
 				drm_mode_destroy(dev, modeset->mode);
 			modeset->mode = NULL;
@ -1387,6 +1471,7 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 out:
 	kfree(crtcs);
 	kfree(modes);
+	kfree(offsets);
 	kfree(enabled);
 }

@ -1416,8 +1501,6 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel)
 	struct drm_device *dev = fb_helper->dev;
 	int count = 0;

-//   drm_fb_helper_parse_command_line(fb_helper);
-
 	mutex_lock(&dev->mode_config.mutex);
 	count = drm_fb_helper_probe_connector_modes(fb_helper,
 						    dev->mode_config.max_width,
--- a/drivers/video/drm/drm_gem.c
+++ b/drivers/video/drm/drm_gem.c
@ -35,6 +35,8 @@
 #include <linux/err.h>
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
+#include <drm/drm_gem.h>
+#include "drm_internal.h"

 /** @file drm_gem.c
 *
@ -143,7 +145,7 @@ int drm_gem_object_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_gem_object_init);

 /**
- * drm_gem_object_init - initialize an allocated private GEM object
+ * drm_gem_private_object_init - initialize an allocated private GEM object
 * @dev: drm_device the object should be initialized for
 * @obj: drm_gem_object to initialize
 * @size: object size
@ -168,7 +170,7 @@ void drm_gem_private_object_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_gem_private_object_init);

 /**
- * drm_gem_object_free - release resources bound to userspace handles
+ * drm_gem_object_handle_free - release resources bound to userspace handles
 * @obj: GEM object to clean up.
 *
 * Called after the last handle to the object has been closed
@ -278,7 +280,7 @@ EXPORT_SYMBOL(drm_gem_dumb_destroy);
 * drm_gem_handle_create_tail - internal functions to create a handle
 * @file_priv: drm file-private structure to register the handle for
 * @obj: object to register
- * @handlep: pionter to return the created handle to the caller
+ * @handlep: pointer to return the created handle to the caller
 * 
 * This expects the dev->object_name_lock to be held already and will drop it
 * before returning. Used to avoid races in establishing new handles when
@ -331,7 +333,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
 }

 /**
- * gem_handle_create - create a gem handle for an object
+ * drm_gem_handle_create - create a gem handle for an object
 * @file_priv: drm file-private structure to register the handle for
 * @obj: object to register
 * @handlep: pionter to return the created handle to the caller
@ -340,8 +342,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
 * to the object, which includes a regular reference count. Callers
 * will likely want to dereference the object afterwards.
 */
-int
-drm_gem_handle_create(struct drm_file *file_priv,
+int drm_gem_handle_create(struct drm_file *file_priv,
 		       struct drm_gem_object *obj,
 		       u32 *handlep)
 {
@ -551,7 +552,7 @@ drm_gem_close_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_close *args = data;
 	int ret;

-	if (!(dev->driver->driver_features & DRIVER_GEM))
+	if (!drm_core_check_feature(dev, DRIVER_GEM))
 		return -ENODEV;

 	ret = drm_gem_handle_delete(file_priv, args->handle);
@ -578,7 +579,7 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_object *obj;
 	int ret;

-	if (!(dev->driver->driver_features & DRIVER_GEM))
+	if (!drm_core_check_feature(dev, DRIVER_GEM))
 		return -ENODEV;

 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
@ -631,7 +632,7 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 	int ret;
 	u32 handle;

-	if (!(dev->driver->driver_features & DRIVER_GEM))
+	if (!drm_core_check_feature(dev, DRIVER_GEM))
 		return -ENODEV;

 	mutex_lock(&dev->object_name_lock);
--- a/drivers/video/drm/drm_internal.h
+++ b/drivers/video/drm/drm_internal.h
@ -0,0 +1,132 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *   Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* drm_irq.c */
+extern unsigned int drm_timestamp_monotonic;
+
+/* drm_fops.c */
+extern struct mutex drm_global_mutex;
+int drm_lastclose(struct drm_device *dev);
+
+/* drm_pci.c */
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u);
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv);
+
+/* drm_vm.c */
+int drm_vma_info(struct seq_file *m, void *data);
+void drm_vm_open_locked(struct drm_device *dev, struct vm_area_struct *vma);
+void drm_vm_close_locked(struct drm_device *dev, struct vm_area_struct *vma);
+
+/* drm_prime.c */
+int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+
+void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
+void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
+void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+					struct dma_buf *dma_buf);
+
+/* drm_info.c */
+int drm_name_info(struct seq_file *m, void *data);
+int drm_vm_info(struct seq_file *m, void *data);
+int drm_bufs_info(struct seq_file *m, void *data);
+int drm_vblank_info(struct seq_file *m, void *data);
+int drm_clients_info(struct seq_file *m, void* data);
+int drm_gem_name_info(struct seq_file *m, void *data);
+
+/* drm_irq.c */
+int drm_control(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+int drm_modeset_ctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv);
+
+/* drm_auth.c */
+int drm_getmagic(struct drm_device *dev, void *data,
+		 struct drm_file *file_priv);
+int drm_authmagic(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv);
+int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
+
+/* drm_sysfs.c */
+extern struct class *drm_class;
+
+struct class *drm_sysfs_create(struct module *owner, char *name);
+void drm_sysfs_destroy(void);
+struct device *drm_sysfs_minor_alloc(struct drm_minor *minor);
+int drm_sysfs_connector_add(struct drm_connector *connector);
+void drm_sysfs_connector_remove(struct drm_connector *connector);
+
+/* drm_gem.c */
+int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
+int drm_gem_handle_create_tail(struct drm_file *file_priv,
+			       struct drm_gem_object *obj,
+			       u32 *handlep);
+int drm_gem_close_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_gem_open_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+void drm_gem_open(struct drm_device *dev, struct drm_file *file_private);
+void drm_gem_release(struct drm_device *dev, struct drm_file *file_private);
+
+/* drm_drv.c */
+int drm_setmaster_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+struct drm_master *drm_master_create(struct drm_minor *minor);
+
+/* drm_debugfs.c */
+#if defined(CONFIG_DEBUG_FS)
+int drm_debugfs_init(struct drm_minor *minor, int minor_id,
+		     struct dentry *root);
+int drm_debugfs_cleanup(struct drm_minor *minor);
+int drm_debugfs_connector_add(struct drm_connector *connector);
+void drm_debugfs_connector_remove(struct drm_connector *connector);
+#else
+static inline int drm_debugfs_init(struct drm_minor *minor, int minor_id,
+				   struct dentry *root)
+{
+	return 0;
+}
+
+static inline int drm_debugfs_cleanup(struct drm_minor *minor)
+{
+	return 0;
+}
+
+static inline int drm_debugfs_connector_add(struct drm_connector *connector)
+{
+	return 0;
+}
+static inline void drm_debugfs_connector_remove(struct drm_connector *connector)
+{
+}
+#endif
--- a/drivers/video/drm/drm_irq.c
+++ b/drivers/video/drm/drm_irq.c
@ -33,13 +33,13 @@
 */

 #include <drm/drmP.h>
-#include <asm/div64.h>
 //#include "drm_trace.h"
+#include "drm_internal.h"

 //#include <linux/interrupt.h>   /* For task queue support */
 #include <linux/slab.h>

-//#include <linux/vgaarb.h>
+#include <linux/vgaarb.h>
 #include <linux/export.h>

 /* Access macro for slots in vblank timestamp ringbuffer. */
@ -56,6 +56,12 @@
 */
 #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000

+static bool
+drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
+			  struct timeval *tvblank, unsigned flags);
+
+static unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
+
 /*
 * Clear vblank timestamp buffer for a crtc.
 */
@ -86,11 +92,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 		goto err;

 	for (i = 0; i < num_crtcs; i++) {
-		dev->vblank[i].dev = dev;
-		dev->vblank[i].crtc = i;
-		init_waitqueue_head(&dev->vblank[i].queue);
-		setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn,
-			    (unsigned long)&dev->vblank[i]);
+		struct drm_vblank_crtc *vblank = &dev->vblank[i];
+
+		vblank->dev = dev;
+		vblank->crtc = i;
+		init_waitqueue_head(&vblank->queue);
+		setup_timer(&vblank->disable_timer, vblank_disable_fn,
+			    (unsigned long)vblank);
 	}

 	DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n");
@ -106,7 +114,7 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	return 0;

 err:
-	drm_vblank_cleanup(dev);
+	dev->num_crtcs = 0;
 	return ret;
 }
 EXPORT_SYMBOL(drm_vblank_init);
@ -178,7 +186,7 @@ int drm_irq_install(struct drm_device *dev, int irq)
 		dev->irq = irq;
    }

-    u16_t cmd = PciRead16(dev->pdev->busnr, dev->pdev->devfn, 4);
+    u16 cmd = PciRead16(dev->pdev->busnr, dev->pdev->devfn, 4);
    cmd&= ~(1<<10);
    PciWrite16(dev->pdev->busnr, dev->pdev->devfn, 4, cmd);

@ -344,6 +352,159 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
 }
 EXPORT_SYMBOL(drm_calc_vbltimestamp_from_scanoutpos);

+/**
+ * drm_vblank_get - get a reference count on vblank events
+ * @dev: DRM device
+ * @crtc: which CRTC to own
+ *
+ * Acquire a reference count on vblank events to avoid having them disabled
+ * while in use.
+ *
+ * This is the legacy version of drm_crtc_vblank_get().
+ *
+ * Returns:
+ * Zero on success, nonzero on failure.
+ */
+int drm_vblank_get(struct drm_device *dev, int crtc)
+{
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+	unsigned long irqflags;
+	int ret = 0;
+#if 0
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	/* Going from 0->1 means we have to enable interrupts again */
+	if (atomic_add_return(1, &vblank->refcount) == 1) {
+		ret = drm_vblank_enable(dev, crtc);
+	} else {
+		if (!vblank->enabled) {
+			atomic_dec(&vblank->refcount);
+			ret = -EINVAL;
+		}
+	}
+	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+#endif
+	return ret;
+}
+EXPORT_SYMBOL(drm_vblank_get);
+
+/**
+ * drm_crtc_vblank_get - get a reference count on vblank events
+ * @crtc: which CRTC to own
+ *
+ * Acquire a reference count on vblank events to avoid having them disabled
+ * while in use.
+ *
+ * This is the native kms version of drm_vblank_off().
+ *
+ * Returns:
+ * Zero on success, nonzero on failure.
+ */
+int drm_crtc_vblank_get(struct drm_crtc *crtc)
+{
+	return drm_vblank_get(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_get);
+
+/**
+ * drm_vblank_put - give up ownership of vblank events
+ * @dev: DRM device
+ * @crtc: which counter to give up
+ *
+ * Release ownership of a given vblank counter, turning off interrupts
+ * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the legacy version of drm_crtc_vblank_put().
+ */
+void drm_vblank_put(struct drm_device *dev, int crtc)
+{
+#if 0
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+
+	if (WARN_ON(atomic_read(&vblank->refcount) == 0))
+		return;
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
+
+	/* Last user schedules interrupt disable */
+	if (atomic_dec_and_test(&vblank->refcount)) {
+		if (drm_vblank_offdelay == 0)
+			return;
+		else if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0)
+			vblank_disable_fn((unsigned long)vblank);
+		else
+			mod_timer(&vblank->disable_timer,
+				  jiffies + ((drm_vblank_offdelay * HZ)/1000));
+	}
+#endif
+}
+EXPORT_SYMBOL(drm_vblank_put);
+
+/**
+ * drm_crtc_vblank_put - give up ownership of vblank events
+ * @crtc: which counter to give up
+ *
+ * Release ownership of a given vblank counter, turning off interrupts
+ * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the native kms version of drm_vblank_put().
+ */
+void drm_crtc_vblank_put(struct drm_crtc *crtc)
+{
+	drm_vblank_put(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_put);
+
+/**
+ * drm_wait_one_vblank - wait for one vblank
+ * @dev: DRM device
+ * @crtc: crtc index
+ *
+ * This waits for one vblank to pass on @crtc, using the irq driver interfaces.
+ * It is a failure to call this when the vblank irq for @crtc is disabled, e.g.
+ * due to lack of driver support or because the crtc is off.
+ */
+void drm_wait_one_vblank(struct drm_device *dev, int crtc)
+{
+#if 0
+	int ret;
+	u32 last;
+
+	ret = drm_vblank_get(dev, crtc);
+	if (WARN(ret, "vblank not available on crtc %i, ret=%i\n", crtc, ret))
+		return;
+
+	last = drm_vblank_count(dev, crtc);
+
+	ret = wait_event_timeout(dev->vblank[crtc].queue,
+				 last != drm_vblank_count(dev, crtc),
+				 msecs_to_jiffies(100));
+
+	WARN(ret == 0, "vblank wait timed out on crtc %i\n", crtc);
+
+	drm_vblank_put(dev, crtc);
+#endif
+}
+EXPORT_SYMBOL(drm_wait_one_vblank);
+
+/**
+ * drm_crtc_wait_one_vblank - wait for one vblank
+ * @crtc: DRM crtc
+ *
+ * This waits for one vblank to pass on @crtc, using the irq driver interfaces.
+ * It is a failure to call this when the vblank irq for @crtc is disabled, e.g.
+ * due to lack of driver support or because the crtc is off.
+ */
+void drm_crtc_wait_one_vblank(struct drm_crtc *crtc)
+{
+	drm_wait_one_vblank(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_wait_one_vblank);
+
 /**
 * drm_vblank_off - disable vblank events on a CRTC
 * @dev: DRM device
@ -360,6 +521,7 @@ EXPORT_SYMBOL(drm_calc_vbltimestamp_from_scanoutpos);
 */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
 	unsigned long irqflags;
@ -395,13 +557,14 @@ EXPORT_SYMBOL(drm_crtc_vblank_off);
 *
 * This functions restores the vblank interrupt state captured with
 * drm_vblank_off() again. Note that calls to drm_vblank_on() and
- * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
+ * drm_vblank_off() can be unbalanced and so can also be unconditionally called
 * in driver load code to reflect the current hardware state of the crtc.
 *
 * This is the legacy version of drm_crtc_vblank_on().
 */
 void drm_vblank_on(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	unsigned long irqflags;

 }
@ -413,7 +576,7 @@ EXPORT_SYMBOL(drm_vblank_on);
 *
 * This functions restores the vblank interrupt state captured with
 * drm_vblank_off() again. Note that calls to drm_vblank_on() and
- * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
+ * drm_vblank_off() can be unbalanced and so can also be unconditionally called
 * in driver load code to reflect the current hardware state of the crtc.
 *
 * This is the native kms version of drm_vblank_on().
@ -453,6 +616,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
    /* vblank is not initialized (IRQ not installed ?) */
    if (!dev->num_crtcs)
        return;
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
+
    /*
     * To avoid all the problems that might happen if interrupts
     * were enabled/disabled around or between these calls, we just
@ -460,10 +627,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
     * to avoid corrupting the count if multiple, mismatch calls occur),
     * so that interrupts remain enabled in the interim.
     */
-	if (!dev->vblank[crtc].inmodeset) {
-		dev->vblank[crtc].inmodeset = 0x1;
+	if (!vblank->inmodeset) {
+		vblank->inmodeset = 0x1;
        if (drm_vblank_get(dev, crtc) == 0)
-			dev->vblank[crtc].inmodeset |= 0x2;
+			vblank->inmodeset |= 0x2;
    }
 #endif
 }
@ -486,16 +653,18 @@ void drm_vblank_post_modeset(struct drm_device *dev, int crtc)
 	if (!dev->num_crtcs)
 		return;

-	if (dev->vblank[crtc].inmodeset) {
+	if (vblank->inmodeset) {
        spin_lock_irqsave(&dev->vbl_lock, irqflags);
 		dev->vblank_disable_allowed = true;
        spin_unlock_irqrestore(&dev->vbl_lock, irqflags);

-		if (dev->vblank[crtc].inmodeset & 0x2)
+		if (vblank->inmodeset & 0x2)
            drm_vblank_put(dev, crtc);

-		dev->vblank[crtc].inmodeset = 0;
+		vblank->inmodeset = 0;
    }
 #endif
 }
 EXPORT_SYMBOL(drm_vblank_post_modeset);
+
+
--- a/drivers/video/drm/drm_legacy.h
+++ b/drivers/video/drm/drm_legacy.h
@ -0,0 +1,113 @@
+#ifndef __DRM_LEGACY_H__
+#define __DRM_LEGACY_H__
+
+/*
+ * Copyright (c) 2014 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains legacy interfaces that modern drm drivers
+ * should no longer be using. They cannot be removed as legacy
+ * drivers use them, and removing them are API breaks.
+ */
+#include <linux/list.h>
+#include <drm/drm_legacy.h>
+
+struct agp_memory;
+struct drm_device;
+struct drm_file;
+
+/*
+ * Generic DRM Contexts
+ */
+
+#define DRM_KERNEL_CONTEXT		0
+#define DRM_RESERVED_CONTEXTS		1
+
+int drm_legacy_ctxbitmap_init(struct drm_device *dev);
+void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev);
+void drm_legacy_ctxbitmap_free(struct drm_device *dev, int ctx_handle);
+void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file);
+
+int drm_legacy_resctx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_addctx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_getctx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_switchctx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_newctx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_rmctx(struct drm_device *d, void *v, struct drm_file *f);
+
+int drm_legacy_setsareactx(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_getsareactx(struct drm_device *d, void *v, struct drm_file *f);
+
+/*
+ * Generic Buffer Management
+ */
+
+#define DRM_MAP_HASH_OFFSET 0x10000000
+
+int drm_legacy_addmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_rmmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_addbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_infobufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_markbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_freebufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_mapbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_dma_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+
+void drm_legacy_vma_flush(struct drm_device *d);
+
+/*
+ * AGP Support
+ */
+
+struct drm_agp_mem {
+	unsigned long handle;
+	struct agp_memory *memory;
+	unsigned long bound;
+	int pages;
+	struct list_head head;
+};
+
+/*
+ * Generic Userspace Locking-API
+ */
+
+int drm_legacy_i_have_hw_lock(struct drm_device *d, struct drm_file *f);
+int drm_legacy_lock(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_unlock(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_lock_free(struct drm_lock_data *lock, unsigned int ctx);
+
+/* DMA support */
+int drm_legacy_dma_setup(struct drm_device *dev);
+void drm_legacy_dma_takedown(struct drm_device *dev);
+void drm_legacy_free_buffer(struct drm_device *dev,
+			    struct drm_buf * buf);
+void drm_legacy_reclaim_buffers(struct drm_device *dev,
+				struct drm_file *filp);
+
+/* Scatter Gather Support */
+void drm_legacy_sg_cleanup(struct drm_device *dev);
+int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_legacy_sg_free(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+
+#endif /* __DRM_LEGACY_H__ */
--- a/drivers/video/drm/drm_modes.c
+++ b/drivers/video/drm/drm_modes.c
@ -912,7 +912,7 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo);
 *
 * This function is a helper which can be used to validate modes against size
 * limitations of the DRM device/connector. If a mode is too big its status
- * memeber is updated with the appropriate validation failure code. The list
+ * member is updated with the appropriate validation failure code. The list
 * itself is not changed.
 */
 void drm_mode_validate_size(struct drm_device *dev,
--- a/drivers/video/drm/drm_modeset_lock.c
+++ b/drivers/video/drm/drm_modeset_lock.c
@ -35,7 +35,7 @@
 * of extra utility/tracking out of our acquire-ctx.  This is provided
 * by drm_modeset_lock / drm_modeset_acquire_ctx.
 *
- * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt
+ * For basic principles of ww_mutex, see: Documentation/locking/ww-mutex-design.txt
 *
 * The basic usage pattern is to:
 *
@ -56,6 +56,230 @@
 */


+/**
+ * __drm_modeset_lock_all - internal helper to grab all modeset locks
+ * @dev: DRM device
+ * @trylock: trylock mode for atomic contexts
+ *
+ * This is a special version of drm_modeset_lock_all() which can also be used in
+ * atomic contexts. Then @trylock must be set to true.
+ *
+ * Returns:
+ * 0 on success or negative error code on failure.
+ */
+int __drm_modeset_lock_all(struct drm_device *dev,
+			   bool trylock)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx),
+		      trylock ? GFP_ATOMIC : GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	if (trylock) {
+		if (!mutex_trylock(&config->mutex))
+			return -EBUSY;
+	} else {
+		mutex_lock(&config->mutex);
+	}
+
+	drm_modeset_acquire_init(ctx, 0);
+	ctx->trylock_only = trylock;
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail;
+	ret = drm_modeset_lock_all_crtcs(dev, ctx);
+	if (ret)
+		goto fail;
+
+	WARN_ON(config->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_all():
+	 */
+	config->acquire_ctx = ctx;
+
+	drm_warn_on_modeset_not_all_locked(dev);
+
+	return 0;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(__drm_modeset_lock_all);
+
+/**
+ * drm_modeset_lock_all - take all modeset locks
+ * @dev: drm device
+ *
+ * This function takes all modeset locks, suitable where a more fine-grained
+ * scheme isn't (yet) implemented. Locks must be dropped with
+ * drm_modeset_unlock_all.
+ */
+void drm_modeset_lock_all(struct drm_device *dev)
+{
+	WARN_ON(__drm_modeset_lock_all(dev, false) != 0);
+}
+EXPORT_SYMBOL(drm_modeset_lock_all);
+
+/**
+ * drm_modeset_unlock_all - drop all modeset locks
+ * @dev: device
+ *
+ * This function drop all modeset locks taken by drm_modeset_lock_all.
+ */
+void drm_modeset_unlock_all(struct drm_device *dev)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
+
+	if (WARN_ON(!ctx))
+		return;
+
+	config->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
+	kfree(ctx);
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_modeset_unlock_all);
+
+/**
+ * drm_modeset_lock_crtc - lock crtc with hidden acquire ctx for a plane update
+ * @crtc: DRM CRTC
+ * @plane: DRM plane to be updated on @crtc
+ *
+ * This function locks the given crtc and plane (which should be either the
+ * primary or cursor plane) using a hidden acquire context. This is necessary so
+ * that drivers internally using the atomic interfaces can grab further locks
+ * with the lock acquire context.
+ *
+ * Note that @plane can be NULL, e.g. when the cursor support hasn't yet been
+ * converted to universal planes yet.
+ */
+void drm_modeset_lock_crtc(struct drm_crtc *crtc,
+			   struct drm_plane *plane)
+{
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return;
+
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&crtc->mutex, ctx);
+	if (ret)
+		goto fail;
+
+	if (plane) {
+		ret = drm_modeset_lock(&plane->mutex, ctx);
+		if (ret)
+			goto fail;
+
+		if (plane->crtc) {
+			ret = drm_modeset_lock(&plane->crtc->mutex, ctx);
+			if (ret)
+				goto fail;
+		}
+	}
+
+	WARN_ON(crtc->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_crtc():
+	 */
+	crtc->acquire_ctx = ctx;
+
+	return;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+}
+EXPORT_SYMBOL(drm_modeset_lock_crtc);
+
+/**
+ * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls
+ * @crtc: drm crtc
+ *
+ * Legacy ioctl operations like cursor updates or page flips only have per-crtc
+ * locking, and store the acquire ctx in the corresponding crtc. All other
+ * legacy operations take all locks and use a global acquire context. This
+ * function grabs the right one.
+ */
+struct drm_modeset_acquire_ctx *
+drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc)
+{
+	if (crtc->acquire_ctx)
+		return crtc->acquire_ctx;
+
+	WARN_ON(!crtc->dev->mode_config.acquire_ctx);
+
+	return crtc->dev->mode_config.acquire_ctx;
+}
+EXPORT_SYMBOL(drm_modeset_legacy_acquire_ctx);
+
+/**
+ * drm_modeset_unlock_crtc - drop crtc lock
+ * @crtc: drm crtc
+ *
+ * This drops the crtc lock acquire with drm_modeset_lock_crtc() and all other
+ * locks acquired through the hidden context.
+ */
+void drm_modeset_unlock_crtc(struct drm_crtc *crtc)
+{
+	struct drm_modeset_acquire_ctx *ctx = crtc->acquire_ctx;
+
+	if (WARN_ON(!ctx))
+		return;
+
+	crtc->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
+	kfree(ctx);
+}
+EXPORT_SYMBOL(drm_modeset_unlock_crtc);
+
+/**
+ * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked
+ * @dev: device
+ *
+ * Useful as a debug assert.
+ */
+void drm_warn_on_modeset_not_all_locked(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+
+	/* Locking is currently fubar in the panic handler. */
+//   if (oops_in_progress)
+//       return;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
+
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+}
+EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
+
 /**
 * drm_modeset_acquire_init - initialize acquire context
 * @ctx: the acquire context
@ -108,7 +332,12 @@ static inline int modeset_lock(struct drm_modeset_lock *lock,

 	WARN_ON(ctx->contended);

-	if (interruptible && slow) {
+	if (ctx->trylock_only) {
+		if (!ww_mutex_trylock(&lock->mutex))
+			return -EBUSY;
+		else
+			return 0;
+	} else if (interruptible && slow) {
 		ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx);
 	} else if (interruptible) {
 		ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx);
@ -226,15 +455,14 @@ void drm_modeset_unlock(struct drm_modeset_lock *lock)
 }
 EXPORT_SYMBOL(drm_modeset_unlock);

-/* Temporary.. until we have sufficiently fine grained locking, there
- * are a couple scenarios where it is convenient to grab all crtc locks.
- * It is planned to remove this:
- */
+/* In some legacy codepaths it's convenient to just grab all the crtc and plane
+ * related locks. */
 int drm_modeset_lock_all_crtcs(struct drm_device *dev,
 		struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
 	int ret = 0;

 	list_for_each_entry(crtc, &config->crtc_list, head) {
@ -243,6 +471,12 @@ int drm_modeset_lock_all_crtcs(struct drm_device *dev,
 			return ret;
 	}

+	list_for_each_entry(plane, &config->plane_list, head) {
+		ret = drm_modeset_lock(&plane->mutex, ctx);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_modeset_lock_all_crtcs);
--- a/drivers/video/drm/drm_pci.c
+++ b/drivers/video/drm/drm_pci.c
@ -22,11 +22,12 @@
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-//#include <linux/pci.h>
-//#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"

 #include <syscall.h>
 /**
--- a/drivers/video/drm/drm_plane_helper.c
+++ b/drivers/video/drm/drm_plane_helper.c
@ -27,10 +27,38 @@
 #include <drm/drmP.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic_helper.h>

 #define SUBPIXEL_MASK 0xffff

+/**
+ * DOC: overview
+ *
+ * This helper library has two parts. The first part has support to implement
+ * primary plane support on top of the normal CRTC configuration interface.
+ * Since the legacy ->set_config interface ties the primary plane together with
+ * the CRTC state this does not allow userspace to disable the primary plane
+ * itself.  To avoid too much duplicated code use
+ * drm_plane_helper_check_update() which can be used to enforce the same
+ * restrictions as primary planes had thus. The default primary plane only
+ * expose XRBG8888 and ARGB8888 as valid pixel formats for the attached
+ * framebuffer.
+ *
+ * Drivers are highly recommended to implement proper support for primary
+ * planes, and newly merged drivers must not rely upon these transitional
+ * helpers.
+ *
+ * The second part also implements transitional helpers which allow drivers to
+ * gradually switch to the atomic helper infrastructure for plane updates. Once
+ * that switch is complete drivers shouldn't use these any longer, instead using
+ * the proper legacy implementations for update and disable plane hooks provided
+ * by the atomic helpers.
+ *
+ * Again drivers are strongly urged to switch to the new interfaces.
+ */
+
 /*
 * This is the minimal list of formats that seem to be safe for modeset use
 * with all current DRM drivers.  Most hardware can actually support more
@ -127,6 +155,11 @@ int drm_plane_helper_check_update(struct drm_plane *plane,
 		return -ERANGE;
 	}

+	if (!fb) {
+		*visible = false;
+		return 0;
+	}
+
 	*visible = drm_rect_clip_scaled(src, dest, clip, hscale, vscale);
 	if (!*visible)
 		/*
@ -369,3 +402,171 @@ int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
 	return drm_crtc_init_with_planes(dev, crtc, primary, NULL, funcs);
 }
 EXPORT_SYMBOL(drm_crtc_init);
+
+int drm_plane_helper_commit(struct drm_plane *plane,
+			    struct drm_plane_state *plane_state,
+			    struct drm_framebuffer *old_fb)
+{
+	struct drm_plane_helper_funcs *plane_funcs;
+	struct drm_crtc *crtc[2];
+	struct drm_crtc_helper_funcs *crtc_funcs[2];
+	int i, ret = 0;
+
+	plane_funcs = plane->helper_private;
+
+	/* Since this is a transitional helper we can't assume that plane->state
+	 * is always valid. Hence we need to use plane->crtc instead of
+	 * plane->state->crtc as the old crtc. */
+	crtc[0] = plane->crtc;
+	crtc[1] = crtc[0] != plane_state->crtc ? plane_state->crtc : NULL;
+
+	for (i = 0; i < 2; i++)
+		crtc_funcs[i] = crtc[i] ? crtc[i]->helper_private : NULL;
+
+	if (plane_funcs->atomic_check) {
+		ret = plane_funcs->atomic_check(plane, plane_state);
+		if (ret)
+			goto out;
+	}
+
+	if (plane_funcs->prepare_fb && plane_state->fb) {
+		ret = plane_funcs->prepare_fb(plane, plane_state->fb);
+		if (ret)
+			goto out;
+	}
+
+	/* Point of no return, commit sw state. */
+	swap(plane->state, plane_state);
+
+	for (i = 0; i < 2; i++) {
+		if (crtc_funcs[i] && crtc_funcs[i]->atomic_begin)
+			crtc_funcs[i]->atomic_begin(crtc[i]);
+	}
+
+	plane_funcs->atomic_update(plane, plane_state);
+
+	for (i = 0; i < 2; i++) {
+		if (crtc_funcs[i] && crtc_funcs[i]->atomic_flush)
+			crtc_funcs[i]->atomic_flush(crtc[i]);
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (!crtc[i])
+			continue;
+
+		/* There's no other way to figure out whether the crtc is running. */
+		ret = drm_crtc_vblank_get(crtc[i]);
+		if (ret == 0) {
+			drm_crtc_wait_one_vblank(crtc[i]);
+			drm_crtc_vblank_put(crtc[i]);
+		}
+
+		ret = 0;
+	}
+
+	if (plane_funcs->cleanup_fb && old_fb)
+		plane_funcs->cleanup_fb(plane, old_fb);
+out:
+	if (plane_state) {
+		if (plane->funcs->atomic_destroy_state)
+			plane->funcs->atomic_destroy_state(plane, plane_state);
+		else
+			drm_atomic_helper_plane_destroy_state(plane, plane_state);
+	}
+
+	return ret;
+}
+
+/**
+ * drm_plane_helper_update() - Helper for primary plane update
+ * @plane: plane object to update
+ * @crtc: owning CRTC of owning plane
+ * @fb: framebuffer to flip onto plane
+ * @crtc_x: x offset of primary plane on crtc
+ * @crtc_y: y offset of primary plane on crtc
+ * @crtc_w: width of primary plane rectangle on crtc
+ * @crtc_h: height of primary plane rectangle on crtc
+ * @src_x: x offset of @fb for panning
+ * @src_y: y offset of @fb for panning
+ * @src_w: width of source rectangle in @fb
+ * @src_h: height of source rectangle in @fb
+ *
+ * Provides a default plane update handler using the atomic plane update
+ * functions. It is fully left to the driver to check plane constraints and
+ * handle corner-cases like a fully occluded or otherwise invisible plane.
+ *
+ * This is useful for piecewise transitioning of a driver to the atomic helpers.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
+ */
+int drm_plane_helper_update(struct drm_plane *plane, struct drm_crtc *crtc,
+			    struct drm_framebuffer *fb,
+			    int crtc_x, int crtc_y,
+			    unsigned int crtc_w, unsigned int crtc_h,
+			    uint32_t src_x, uint32_t src_y,
+			    uint32_t src_w, uint32_t src_h)
+{
+	struct drm_plane_state *plane_state;
+
+	if (plane->funcs->atomic_duplicate_state)
+		plane_state = plane->funcs->atomic_duplicate_state(plane);
+	else if (plane->state)
+		plane_state = drm_atomic_helper_plane_duplicate_state(plane);
+	else
+		plane_state = kzalloc(sizeof(*plane_state), GFP_KERNEL);
+	if (!plane_state)
+		return -ENOMEM;
+
+	plane_state->crtc = crtc;
+	drm_atomic_set_fb_for_plane(plane_state, fb);
+	plane_state->crtc_x = crtc_x;
+	plane_state->crtc_y = crtc_y;
+	plane_state->crtc_h = crtc_h;
+	plane_state->crtc_w = crtc_w;
+	plane_state->src_x = src_x;
+	plane_state->src_y = src_y;
+	plane_state->src_h = src_h;
+	plane_state->src_w = src_w;
+
+	return drm_plane_helper_commit(plane, plane_state, plane->fb);
+}
+EXPORT_SYMBOL(drm_plane_helper_update);
+
+/**
+ * drm_plane_helper_disable() - Helper for primary plane disable
+ * @plane: plane to disable
+ *
+ * Provides a default plane disable handler using the atomic plane update
+ * functions. It is fully left to the driver to check plane constraints and
+ * handle corner-cases like a fully occluded or otherwise invisible plane.
+ *
+ * This is useful for piecewise transitioning of a driver to the atomic helpers.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
+ */
+int drm_plane_helper_disable(struct drm_plane *plane)
+{
+	struct drm_plane_state *plane_state;
+
+	/* crtc helpers love to call disable functions for already disabled hw
+	 * functions. So cope with that. */
+	if (!plane->crtc)
+		return 0;
+
+	if (plane->funcs->atomic_duplicate_state)
+		plane_state = plane->funcs->atomic_duplicate_state(plane);
+	else if (plane->state)
+		plane_state = drm_atomic_helper_plane_duplicate_state(plane);
+	else
+		plane_state = kzalloc(sizeof(*plane_state), GFP_KERNEL);
+	if (!plane_state)
+		return -ENOMEM;
+
+	plane_state->crtc = NULL;
+	drm_atomic_set_fb_for_plane(plane_state, NULL);
+
+	return drm_plane_helper_commit(plane, plane_state, plane->fb);
+}
+EXPORT_SYMBOL(drm_plane_helper_disable);
--- a/drivers/video/drm/drm_probe_helper.c
+++ b/drivers/video/drm/drm_probe_helper.c
@ -102,7 +102,8 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 		mode->status = MODE_UNVERIFIED;

 	if (connector->force) {
-		if (connector->force == DRM_FORCE_ON)
+		if (connector->force == DRM_FORCE_ON ||
+		    connector->force == DRM_FORCE_ON_DIGITAL)
 			connector->status = connector_status_connected;
 		else
 			connector->status = connector_status_disconnected;
--- a/drivers/video/drm/drm_stub.c
+++ b/drivers/video/drm/drm_stub.c
@ -33,11 +33,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_core.h>

-struct va_format {
-    const char *fmt;
-    va_list *va;
-};
-
 unsigned int drm_debug = 0;	/* 1 to enable debug output */
 EXPORT_SYMBOL(drm_debug);

@ -61,22 +56,21 @@ EXPORT_SYMBOL(drm_timestamp_precision);
 unsigned int drm_timestamp_monotonic = 1;

 struct idr drm_minors_idr;
-int drm_err(const char *func, const char *format, ...)
+
+void drm_err(const char *format, ...)
 {
-	struct va_format vaf;
-	va_list args;
-	int r;
+    struct va_format vaf;
+    va_list args;

-	va_start(args, format);
+    va_start(args, format);

-	vaf.fmt = format;
-	vaf.va = &args;
+    vaf.fmt = format;
+    vaf.va = &args;

-	r = printk(KERN_ERR "[" DRM_NAME ":%s] *ERROR* %pV", func, &vaf);
+    printk(KERN_ERR "[" DRM_NAME ":%pf] *ERROR* %pV",
+           __builtin_return_address(0), &vaf);

-	va_end(args);
-
-	return r;
+    va_end(args);
 }
 EXPORT_SYMBOL(drm_err);

@ -561,10 +555,6 @@ int drm_order(unsigned long size)

 extern int x86_clflush_size;

-static inline void clflush(volatile void *__p)
-{
-    asm volatile("clflush %0" : "+m" (*(volatile char*)__p));
-}

 void drm_clflush_virt_range(void *addr, unsigned long length)
 {
--- a/drivers/video/drm/i2c/i2c-algo-bit.c
+++ b/drivers/video/drm/i2c/i2c-algo-bit.c
@ -12,25 +12,19 @@
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-    MA 02110-1301 USA.
 * ------------------------------------------------------------------------- */

 /* With some changes from Frodo Looijaard <frodol@dds.nl>, Kyösti Mälkki
-   <kmalkki@cc.hut.fi> and Jean Delvare <khali@linux-fr.org> */
+   <kmalkki@cc.hut.fi> and Jean Delvare <jdelvare@suse.de> */

-#include <types.h>
-#include <list.h>
 #include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <syscall.h>
-#include <linux/jiffies.h>
-#include <errno.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
+#include <syscall.h>

 #define I2C_FUNC_NOSTART                0x00000010 /* I2C_M_NOSTART */

@ -49,11 +43,11 @@

 /* ----- global variables ---------------------------------------------	*/

-static int bit_test = 0;   /* see if the line-setting functions work   */
+static int bit_test;	/* see if the line-setting functions work	*/
+MODULE_PARM_DESC(bit_test, "lines testing - 0 off; 1 report; 2 fail if stuck");

 #ifdef DEBUG
 static int i2c_debug = 1;
-module_param(i2c_debug, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(i2c_debug,
 		 "debug level - 0 off; 1 normal; 2 verbose; 3 very verbose");
 #endif
@ -97,14 +91,14 @@ static int sclhi(struct i2c_algo_bit_data *adap)
 	if (!adap->getscl)
 		goto done;

-    start = GetTimerTicks();
+	start = jiffies;
 	while (!getscl(adap)) {
 		/* This hw knows how to read the clock line, so we wait
 		 * until it actually gets high.  This is safer as some
 		 * chips may hold it low ("clock stretching") while they
 		 * are processing data internally.
 		 */
-		if (time_after(GetTimerTicks(), start + adap->timeout)) {
+		if (time_after(jiffies, start + adap->timeout)) {
 			/* Test one last time, as we may have been preempted
 			 * between last check and timeout test.
 			 */
@ -112,8 +106,14 @@ static int sclhi(struct i2c_algo_bit_data *adap)
 				break;
           return -ETIMEDOUT;
 		}
-		asm volatile("rep; nop" ::: "memory");
+		cpu_relax();
    }
+#ifdef DEBUG
+	if (jiffies != start && i2c_debug >= 3)
+		pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go "
+			 "high\n", jiffies - start);
+#endif
+
 done:
 	udelay(adap->udelay);
 	return 0;
@ -650,3 +650,6 @@ int i2c_bit_add_bus(struct i2c_adapter *adap)
 }


+MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
+MODULE_DESCRIPTION("I2C-Bus bit-banging algorithm");
+MODULE_LICENSE("GPL");
--- a/drivers/video/drm/i2c/i2c-core.c
+++ b/drivers/video/drm/i2c/i2c-core.c
@ -27,43 +27,13 @@

 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <list.h>
-#include <errno.h>
+#include <linux/list.h>
+#include <linux/errno.h>
 #include <linux/i2c.h>
-#include <syscall.h>
 #include <linux/jiffies.h>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+#include <syscall.h>



@ -327,7 +297,7 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
            if (ret != -EAGAIN)
                break;

-            if (time_after(GetTimerTicks(), orig_jiffies + adap->timeout))
+            if (time_after((unsigned long)GetTimerTicks(), orig_jiffies + adap->timeout))
                break;

            delay(1);
--- a/drivers/video/drm/radeon/Makefile
+++ b/drivers/video/drm/radeon/Makefile
@ -1,21 +1,23 @@
-
-
 CC = gcc
 LD = ld
 AS = as
 FASM = fasm.exe

-DEFINES	 = -D__KERNEL__ -DCONFIG_X86_32 
+DEFINES	 = -D__KERNEL__ -DCONFIG_X86_32 -DCONFIG_TINY_RCU -DCONFIG_X86_L1_CACHE_SHIFT=6
+DEFINES += -DCONFIG_ARCH_HAS_CACHE_LINE_SIZE

 DRV_TOPDIR   = $(CURDIR)/../../..
 DRM_TOPDIR   = $(CURDIR)/..

 DRV_INCLUDES = $(DRV_TOPDIR)/include

-INCLUDES = 	-I$(DRV_INCLUDES) -I$(DRV_INCLUDES)/drm	\
-		-I$(DRV_INCLUDES)/linux
+INCLUDES =	-I$(DRV_INCLUDES)	\
+		-I$(DRV_INCLUDES)/asm 	\
+		-I$(DRV_INCLUDES)/uapi	\
+		-I$(DRV_INCLUDES)/drm -I./ -I$(DRV_INCLUDES)

-CFLAGS   = -c -Os $(INCLUDES) $(DEFINES) -march=i686 -fomit-frame-pointer -fno-builtin-printf
+CFLAGS= -c -O2 $(INCLUDES) $(DEFINES) -march=i686 -fno-ident -fomit-frame-pointer -fno-builtin-printf
+CFLAGS+= -mno-stack-arg-probe -mpreferred-stack-boundary=2 -mincoming-stack-boundary=2 -mno-ms-bitfields

 LIBPATH:= $(DRV_TOPDIR)/ddk

@ -30,7 +32,6 @@ NAME:=	  atikms
 HFILES:=     		$(DRV_INCLUDES)/linux/types.h		\
 			$(DRV_INCLUDES)/linux/list.h		\
 			$(DRV_INCLUDES)/linux/pci.h		\
-			$(DRV_INCLUDES)/drm/drm.h		\
 			$(DRV_INCLUDES)/drm/drmP.h		\
 			$(DRV_INCLUDES)/drm/drm_edid.h		\
 			$(DRV_INCLUDES)/drm/drm_crtc.h		\
@ -54,6 +55,7 @@ NAME_SRC=							\
 			$(DRM_TOPDIR)/drm_crtc_helper.c		\
 			$(DRM_TOPDIR)/drm_dp_helper.c		\
 			$(DRM_TOPDIR)/drm_drv.c			\
+			$(DRM_TOPDIR)/drm_atomic.c		\
 			$(DRM_TOPDIR)/drm_edid.c		\
 			$(DRM_TOPDIR)/drm_fb_helper.c		\
 			$(DRM_TOPDIR)/drm_gem.c			\
@ -123,6 +125,7 @@ NAME_SRC=							\
 			radeon_ring.c				\
 			radeon_sa.c				\
 			radeon_semaphore.c			\
+			radeon_sync.c				\
 			radeon_test.c				\
 			radeon_ttm.c				\
 			radeon_ucode.c				\
@ -139,7 +142,6 @@ NAME_SRC=							\
 			rv740_dpm.c				\
 			r520.c					\
 			r600.c					\
-			r600_audio.c				\
 			r600_blit_shaders.c			\
 			r600_cs.c				\
 			r600_dma.c				\
--- a/drivers/video/drm/radeon/Makefile.lto
+++ b/drivers/video/drm/radeon/Makefile.lto
@ -5,15 +5,17 @@ LD = ld
 AS = as
 FASM = fasm

-DEFINES	 = -D__KERNEL__ -DCONFIG_X86_32 
+DEFINES	 = -D__KERNEL__ -DCONFIG_X86_32 -DCONFIG_TINY_RCU -DCONFIG_X86_L1_CACHE_SHIFT=6
+DEFINES += -DCONFIG_ARCH_HAS_CACHE_LINE_SIZE

 DDK_TOPDIR      = d:\kos\kolibri\drivers\ddk
 DRV_INCLUDES    = /d/kos/kolibri/drivers/include
 DRM_TOPDIR   = $(CURDIR)/..

-INCLUDES = 	-I$(DRV_INCLUDES)/linux/uapi -I$(DRV_INCLUDES)/linux \
-                -I$(DRV_INCLUDES)/linux/asm -I$(DRV_INCLUDES)/drm \
-		-I./ -I$(DRV_INCLUDES)
+INCLUDES =	-I$(DRV_INCLUDES)	\
+		-I$(DRV_INCLUDES)/asm 	\
+		-I$(DRV_INCLUDES)/uapi	\
+		-I$(DRV_INCLUDES)/drm -I./ -I$(DRV_INCLUDES)

 CFLAGS_OPT = -Os -march=i686 -fno-ident -fomit-frame-pointer -fno-builtin-printf -mno-ms-bitfields
 CFLAGS_OPT+= -mno-stack-arg-probe -mpreferred-stack-boundary=2 -mincoming-stack-boundary=2 -flto
@ -34,7 +36,6 @@ NAME:=	  atikms
 HFILES:=     		$(DRV_INCLUDES)/linux/types.h		\
 			$(DRV_INCLUDES)/linux/list.h		\
 			$(DRV_INCLUDES)/linux/pci.h		\
-			$(DRV_INCLUDES)/drm/drm.h		\
 			$(DRV_INCLUDES)/drm/drmP.h		\
 			$(DRV_INCLUDES)/drm/drm_edid.h		\
 			$(DRV_INCLUDES)/drm/drm_crtc.h		\
@ -58,6 +59,7 @@ NAME_SRC=							\
 			$(DRM_TOPDIR)/drm_crtc_helper.c		\
 			$(DRM_TOPDIR)/drm_dp_helper.c		\
 			$(DRM_TOPDIR)/drm_drv.c			\
+			$(DRM_TOPDIR)/drm_atomic.c		\
 			$(DRM_TOPDIR)/drm_edid.c		\
 			$(DRM_TOPDIR)/drm_fb_helper.c		\
 			$(DRM_TOPDIR)/drm_gem.c			\
@ -127,6 +129,7 @@ NAME_SRC=							\
 			radeon_ring.c				\
 			radeon_sa.c				\
 			radeon_semaphore.c			\
+			radeon_sync.c				\
 			radeon_test.c				\
 			radeon_ttm.c				\
 			radeon_ucode.c				\
@ -143,7 +146,6 @@ NAME_SRC=							\
 			rv740_dpm.c				\
 			r520.c					\
 			r600.c					\
-			r600_audio.c				\
 			r600_blit_shaders.c			\
 			r600_cs.c				\
 			r600_dma.c				\
--- a/drivers/video/drm/radeon/atom.c
+++ b/drivers/video/drm/radeon/atom.c
@ -1215,7 +1215,7 @@ free:
 	return ret;
 }

-int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params)
+int atom_execute_table_scratch_unlocked(struct atom_context *ctx, int index, uint32_t * params)
 {
 	int r;

@ -1236,6 +1236,15 @@ int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params)
 	return r;
 }

+int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params)
+{
+	int r;
+	mutex_lock(&ctx->scratch_mutex);
+	r = atom_execute_table_scratch_unlocked(ctx, index, params);
+	mutex_unlock(&ctx->scratch_mutex);
+	return r;
+}
+
 static int atom_iio_len[] = { 1, 2, 3, 3, 3, 3, 4, 4, 4, 3 };

 static void atom_index_iio(struct atom_context *ctx, int base)
@ -1344,7 +1353,7 @@ int atom_asic_init(struct atom_context *ctx)

 void atom_destroy(struct atom_context *ctx)
 {
-		kfree(ctx->iio);
+	kfree(ctx->iio);
 	kfree(ctx);
 }

--- a/drivers/video/drm/radeon/atom.h
+++ b/drivers/video/drm/radeon/atom.h
@ -125,6 +125,7 @@ struct card_info {
 struct atom_context {
 	struct card_info *card;
 	struct mutex mutex;
+	struct mutex scratch_mutex;
 	void *bios;
 	uint32_t cmd_table, data_table;
 	uint16_t *iio;
@ -145,6 +146,7 @@ extern int atom_debug;

 struct atom_context *atom_parse(struct card_info *, void *);
 int atom_execute_table(struct atom_context *, int, uint32_t *);
+int atom_execute_table_scratch_unlocked(struct atom_context *, int, uint32_t *);
 int atom_asic_init(struct atom_context *);
 void atom_destroy(struct atom_context *);
 bool atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
--- a/drivers/video/drm/radeon/atombios_crtc.c
+++ b/drivers/video/drm/radeon/atombios_crtc.c
@ -2039,6 +2039,7 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
 	atombios_crtc_set_base(crtc, x, y, old_fb);
    atombios_overscan_setup(crtc, mode, adjusted_mode);
    atombios_scaler_setup(crtc);
+//   radeon_cursor_reset(crtc);
 	/* update the hw version fpr dpm */
 	radeon_crtc->hw_mode = *adjusted_mode;

--- a/drivers/video/drm/radeon/atombios_dp.c
+++ b/drivers/video/drm/radeon/atombios_dp.c
@ -100,6 +100,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,
 	memset(&args, 0, sizeof(args));

 	mutex_lock(&chan->mutex);
+	mutex_lock(&rdev->mode_info.atom_context->scratch_mutex);

 	base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);

@ -113,7 +114,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,
 	if (ASIC_IS_DCE4(rdev))
 		args.v2.ucHPD_ID = chan->rec.hpd;

-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+	atom_execute_table_scratch_unlocked(rdev->mode_info.atom_context, index, (uint32_t *)&args);

 	*ack = args.v1.ucReplyStatus;

@ -147,6 +148,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,

 	r = recv_bytes;
 done:
+	mutex_unlock(&rdev->mode_info.atom_context->scratch_mutex);
 	mutex_unlock(&chan->mutex);

 	return r;
@ -232,8 +234,8 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)

 /***** general DP utility functions *****/

-#define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_1200
-#define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPHASIS_9_5
+#define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_LEVEL_3
+#define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPH_LEVEL_3

 static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
 				int lane_count,
--- a/drivers/video/drm/radeon/atombios_encoders.c
+++ b/drivers/video/drm/radeon/atombios_encoders.c
@ -291,29 +291,6 @@ static void radeon_atom_backlight_exit(struct radeon_encoder *encoder)
 bool radeon_atom_get_tv_timings(struct radeon_device *rdev, int index,
 				struct drm_display_mode *mode);

-
-static inline bool radeon_encoder_is_digital(struct drm_encoder *encoder)
-{
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	switch (radeon_encoder->encoder_id) {
-	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
-	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
-	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
-	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
-	case ENCODER_OBJECT_ID_INTERNAL_DDI:
-	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
-	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
-	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 				   const struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
--- a/drivers/video/drm/radeon/atombios_i2c.c
+++ b/drivers/video/drm/radeon/atombios_i2c.c
@ -48,6 +48,7 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan,
 	memset(&args, 0, sizeof(args));

 	mutex_lock(&chan->mutex);
+	mutex_lock(&rdev->mode_info.atom_context->scratch_mutex);

 	base = (unsigned char *)rdev->mode_info.atom_context->scratch;

@ -82,7 +83,7 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan,
 	args.ucSlaveAddr = slave_addr << 1;
 	args.ucLineNumber = chan->rec.i2c_id;

-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+	atom_execute_table_scratch_unlocked(rdev->mode_info.atom_context, index, (uint32_t *)&args);

 	/* error */
 	if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
@ -95,6 +96,7 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan,
 		radeon_atom_copy_swap(buf, base, num, false);

 done:
+	mutex_unlock(&rdev->mode_info.atom_context->scratch_mutex);
 	mutex_unlock(&chan->mutex);

 	return r;
--- a/drivers/video/drm/radeon/btc_dpm.c
+++ b/drivers/video/drm/radeon/btc_dpm.c
@ -24,6 +24,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "btcd.h"
 #include "r600_dpm.h"
 #include "cypress_dpm.h"
@ -2099,7 +2100,6 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev,
 	bool disable_mclk_switching;
 	u32 mclk, sclk;
 	u16 vddc, vddci;
-	u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;

 	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
 	    btc_dpm_vblank_too_short(rdev))
@ -2141,39 +2141,6 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev,
 			ps->low.vddci = max_limits->vddci;
 	}

-	/* limit clocks to max supported clocks based on voltage dependency tables */
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
-							&max_sclk_vddc);
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
-							&max_mclk_vddci);
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
-							&max_mclk_vddc);
-
-	if (max_sclk_vddc) {
-		if (ps->low.sclk > max_sclk_vddc)
-			ps->low.sclk = max_sclk_vddc;
-		if (ps->medium.sclk > max_sclk_vddc)
-			ps->medium.sclk = max_sclk_vddc;
-		if (ps->high.sclk > max_sclk_vddc)
-			ps->high.sclk = max_sclk_vddc;
-	}
-	if (max_mclk_vddci) {
-		if (ps->low.mclk > max_mclk_vddci)
-			ps->low.mclk = max_mclk_vddci;
-		if (ps->medium.mclk > max_mclk_vddci)
-			ps->medium.mclk = max_mclk_vddci;
-		if (ps->high.mclk > max_mclk_vddci)
-			ps->high.mclk = max_mclk_vddci;
-	}
-	if (max_mclk_vddc) {
-		if (ps->low.mclk > max_mclk_vddc)
-			ps->low.mclk = max_mclk_vddc;
-		if (ps->medium.mclk > max_mclk_vddc)
-			ps->medium.mclk = max_mclk_vddc;
-		if (ps->high.mclk > max_mclk_vddc)
-			ps->high.mclk = max_mclk_vddc;
-	}
-
 	/* XXX validate the min clocks required for display */

 	if (disable_mclk_switching) {
--- a/drivers/video/drm/radeon/ci_dpm.c
+++ b/drivers/video/drm/radeon/ci_dpm.c
--- a/drivers/video/drm/radeon/ci_dpm.h
+++ b/drivers/video/drm/radeon/ci_dpm.h
@ -33,6 +33,8 @@

 #define CISLANDS_MAX_HARDWARE_POWERLEVELS 2

+#define CISLANDS_UNUSED_GPIO_PIN 0x7F
+
 struct ci_pl {
 	u32 mclk;
 	u32 sclk;
@ -237,6 +239,7 @@ struct ci_power_info {
 	u32 sclk_dpm_key_disabled;
 	u32 mclk_dpm_key_disabled;
 	u32 pcie_dpm_key_disabled;
+	u32 thermal_sclk_dpm_enabled;
 	struct ci_pcie_perf_range pcie_gen_performance;
 	struct ci_pcie_perf_range pcie_lane_performance;
 	struct ci_pcie_perf_range pcie_gen_powersaving;
@ -264,6 +267,7 @@ struct ci_power_info {
 	bool caps_automatic_dc_transition;
 	bool caps_sclk_throttle_low_notification;
 	bool caps_dynamic_ac_timing;
+	bool caps_od_fuzzy_fan_control_support;
 	/* flags */
 	bool thermal_protection;
 	bool pcie_performance_request;
@ -285,6 +289,10 @@ struct ci_power_info {
 	struct ci_ps current_ps;
 	struct radeon_ps requested_rps;
 	struct ci_ps requested_ps;
+	/* fan control */
+	bool fan_ctrl_is_in_default_mode;
+	u32 t_min;
+	u32 fan_ctrl_default_mode;
 };

 #define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0
--- a/drivers/video/drm/radeon/ci_smc.c
+++ b/drivers/video/drm/radeon/ci_smc.c
@ -129,7 +129,7 @@ void ci_reset_smc(struct radeon_device *rdev)

 int ci_program_jump_on_start(struct radeon_device *rdev)
 {
-	static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
+	static const u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };

 	return ci_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1);
 }
--- a/drivers/video/drm/radeon/cik.c
+++ b/drivers/video/drm/radeon/cik.c
@ -32,6 +32,7 @@
 #include "cik_blit_shaders.h"
 #include "radeon_ucode.h"
 #include "clearstate_ci.h"
+#include "radeon_kfd.h"

 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@ -1563,6 +1564,8 @@ static const u32 godavari_golden_registers[] =

 static void cik_init_golden_registers(struct radeon_device *rdev)
 {
+	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
+	mutex_lock(&rdev->grbm_idx_mutex);
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
 		radeon_program_register_sequence(rdev,
@ -1637,6 +1640,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
 	default:
 		break;
 	}
+	mutex_unlock(&rdev->grbm_idx_mutex);
 }

 /**
@ -1806,7 +1810,7 @@ int ci_mc_load_microcode(struct radeon_device *rdev)
 {
 	const __be32 *fw_data = NULL;
 	const __le32 *new_fw_data = NULL;
-	u32 running, blackout = 0;
+	u32 running, blackout = 0, tmp;
 	u32 *io_mc_regs = NULL;
 	const __le32 *new_io_mc_regs = NULL;
 	int i, regs_size, ucode_size;
@ -1866,6 +1870,15 @@ int ci_mc_load_microcode(struct radeon_device *rdev)
 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
 		}
 		}
+
+		tmp = RREG32(MC_SEQ_MISC0);
+		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
+			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
+			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
+			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
+			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
+		}
+
 		/* load the MC ucode */
 		for (i = 0; i < ucode_size; i++) {
 			if (rdev->new_fw)
@ -3419,6 +3432,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
 	u32 disabled_rbs = 0;
 	u32 enabled_rbs = 0;

+	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < se_num; i++) {
 		for (j = 0; j < sh_per_se; j++) {
 			cik_select_se_sh(rdev, i, j);
@ -3430,6 +3444,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
 		}
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&rdev->grbm_idx_mutex);

 	mask = 1;
 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
@ -3440,6 +3455,7 @@ static void cik_setup_rb(struct radeon_device *rdev,

 	rdev->config.cik.backend_enable_mask = enabled_rbs;

+	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < se_num; i++) {
 		cik_select_se_sh(rdev, i, 0xffffffff);
 		data = 0;
@ -3467,6 +3483,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
 		WREG32(PA_SC_RASTER_CONFIG, data);
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&rdev->grbm_idx_mutex);
 }

 /**
@ -3684,6 +3701,12 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	/* set HW defaults for 3D engine */
 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));

+	mutex_lock(&rdev->grbm_idx_mutex);
+	/*
+	 * making sure that the following register writes will be broadcasted
+	 * to all the shaders
+	 */
+	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 	WREG32(SX_DEBUG_1, 0x20);

 	WREG32(TA_CNTL_AUX, 0x00010000);
@ -3739,6 +3762,7 @@ static void cik_gpu_init(struct radeon_device *rdev)

 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
+	mutex_unlock(&rdev->grbm_idx_mutex);

 	udelay(50);
 }
@ -3959,41 +3983,38 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
 * @src_offset: src GPU address
 * @dst_offset: dst GPU address
 * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
 *
 * Copy GPU paging using the CP DMA engine (CIK+).
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
-int cik_copy_cpdma(struct radeon_device *rdev,
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence)
+				    struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, control;
 	int i, num_loops;
 	int r = 0;

-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
+	radeon_sync_create(&sync);

 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

-	radeon_semaphore_sync_to(sem, *fence);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);

 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
@ -4014,17 +4035,17 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}

-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_sync_free(rdev, &sync, fence);

-	return r;
+	return fence;
 }

 /*
@ -4045,6 +4066,7 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 	u32 header, control = INDIRECT_BUFFER_VALID;

 	if (ib->is_const_ib) {
@ -4073,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 	}

-	control |= ib->length_dw |
-		(ib->vm ? (ib->vm->id << 24) : 0);
+	control |= ib->length_dw | (vm_id << 24);

 	radeon_ring_write(ring, header);
 	radeon_ring_write(ring,
@ -4234,7 +4255,7 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
 		WREG32(CP_PFP_UCODE_ADDR, 0);
 		for (i = 0; i < fw_size; i++)
 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
-		WREG32(CP_PFP_UCODE_ADDR, 0);
+		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));

 		/* CE */
 		fw_data = (const __le32 *)
@ -4243,7 +4264,7 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
 		WREG32(CP_CE_UCODE_ADDR, 0);
 		for (i = 0; i < fw_size; i++)
 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
-		WREG32(CP_CE_UCODE_ADDR, 0);
+		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));

 		/* ME */
 		fw_data = (const __be32 *)
@ -4252,7 +4273,8 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
 		WREG32(CP_ME_RAM_WADDR, 0);
 		for (i = 0; i < fw_size; i++)
 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
-		WREG32(CP_ME_RAM_WADDR, 0);
+		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
+		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
 	} else {
 		const __be32 *fw_data;

@ -4278,10 +4300,6 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
 	WREG32(CP_ME_RAM_WADDR, 0);
 	}

-	WREG32(CP_PFP_UCODE_ADDR, 0);
-	WREG32(CP_CE_UCODE_ADDR, 0);
-	WREG32(CP_ME_RAM_WADDR, 0);
-	WREG32(CP_ME_RAM_RADDR, 0);
 	return 0;
 }

@ -4315,8 +4333,8 @@ static int cik_cp_gfx_start(struct radeon_device *rdev)
 	/* init the CE partitions.  CE only used for gfx on CIK */
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
-	radeon_ring_write(ring, 0xc000);
-	radeon_ring_write(ring, 0xc000);
+	radeon_ring_write(ring, 0x8000);
+	radeon_ring_write(ring, 0x8000);

 	/* setup clear context state */
 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
@ -4563,7 +4581,7 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
 		for (i = 0; i < fw_size; i++)
 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
-		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
+		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));

 		/* MEC2 */
 		if (rdev->family == CHIP_KAVERI) {
@ -4577,7 +4595,7 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
 			for (i = 0; i < fw_size; i++)
 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
-			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
+			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
 		}
 	} else {
 		const __be32 *fw_data;
@ -4677,19 +4695,18 @@ static int cik_mec_init(struct radeon_device *rdev)
 	/*
 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
+	 * Nonetheless, we assign only 1 pipe because all other pipes will
+	 * be handled by KFD
 	 */
-	if (rdev->family == CHIP_KAVERI)
-		rdev->mec.num_mec = 2;
-	else
 		rdev->mec.num_mec = 1;
-	rdev->mec.num_pipe = 4;
+	rdev->mec.num_pipe = 1;
 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;

 	if (rdev->mec.hpd_eop_obj == NULL) {
 		r = radeon_bo_create(rdev,
 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
 				     PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &rdev->mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
@ -4824,13 +4841,10 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)

 	/* init the pipes */
 	mutex_lock(&rdev->srbm_mutex);
-	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
-		int me = (i < 4) ? 1 : 2;
-		int pipe = (i < 4) ? i : (i - 4);

-		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;

-		cik_srbm_select(rdev, me, pipe, 0, 0);
+	cik_srbm_select(rdev, 0, 0, 0, 0);

 		/* write the EOP addr */
 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
@ -4844,8 +4858,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 		tmp &= ~EOP_SIZE_MASK;
 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
 		WREG32(CP_HPD_EOP_CONTROL, tmp);
-	}
-	cik_srbm_select(rdev, 0, 0, 0, 0);
+
 	mutex_unlock(&rdev->srbm_mutex);

 	/* init the queues.  Just two for now. */
@ -4860,7 +4873,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 					     sizeof(struct bonaire_mqd),
 					     PAGE_SIZE, true,
 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
-					     &rdev->ring[idx].mqd_obj);
+					     NULL, &rdev->ring[idx].mqd_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
 				return r;
@ -5899,8 +5912,13 @@ int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 */
 int cik_vm_init(struct radeon_device *rdev)
 {
-	/* number of VMs */
-	rdev->vm_manager.nvm = 16;
+	/*
+	 * number of VMs
+	 * VMID 0 is reserved for System
+	 * radeon graphics/compute will use VMIDs 1-7
+	 * amdkfd will use VMIDs 8-15
+	 */
+	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
 	/* base offset of vram pages */
 	if (rdev->flags & RADEON_IS_IGP) {
 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
@ -5960,26 +5978,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
 * Update the page table base and flush the VM TLB
 * using the CP (CIK).
 */
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		  unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
-
-	if (vm == NULL)
-		return;
+	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);

 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
-	if (vm->id < 8) {
+	if (vm_id < 8) {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
 	} else {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
 	}
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);

 	/* update SH_MEM_* regs */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@ -5987,7 +6002,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, VMID(vm->id));
+	radeon_ring_write(ring, VMID(vm_id));

 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@ -6008,7 +6023,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 	radeon_ring_write(ring, VMID(0));

 	/* HDP flush */
-	cik_hdp_flush_cp_ring_emit(rdev, ridx);
+	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);

 	/* bits 0-15 are the VM contexts0-15 */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@ -6016,7 +6031,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);

 	/* compute doesn't have PFP */
 	if (usepfp) {
@ -6061,6 +6076,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
 	u32 i, j, k;
 	u32 mask;

+	mutex_lock(&rdev->grbm_idx_mutex);
 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
 			cik_select_se_sh(rdev, i, j);
@ -6072,6 +6088,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
 		}
 	}
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&rdev->grbm_idx_mutex);

 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
 	for (k = 0; k < rdev->usec_timeout; k++) {
@ -6206,10 +6223,12 @@ static int cik_rlc_resume(struct radeon_device *rdev)
 	WREG32(RLC_LB_CNTR_INIT, 0);
 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);

+	mutex_lock(&rdev->grbm_idx_mutex);
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
 	WREG32(RLC_LB_PARAMS, 0x00600408);
 	WREG32(RLC_LB_CNTL, 0x80000004);
+	mutex_unlock(&rdev->grbm_idx_mutex);

 	WREG32(RLC_MC_CNTL, 0);
 	WREG32(RLC_UCODE_CNTL, 0);
@ -6226,7 +6245,7 @@ static int cik_rlc_resume(struct radeon_device *rdev)
 		WREG32(RLC_GPM_UCODE_ADDR, 0);
 		for (i = 0; i < size; i++)
 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
-		WREG32(RLC_GPM_UCODE_ADDR, 0);
+		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
 	} else {
 		const __be32 *fw_data;

@ -6276,11 +6295,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)

 		tmp = cik_halt_rlc(rdev);

+		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
+		mutex_unlock(&rdev->grbm_idx_mutex);

 		cik_update_rlc(rdev, tmp);

@ -6316,17 +6337,20 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
 		}

 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+		data |= 0x00000001;
 		data &= 0xfffffffd;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);

 		tmp = cik_halt_rlc(rdev);

+		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
 		WREG32(RLC_SERDES_WR_CTRL, data);
+		mutex_unlock(&rdev->grbm_idx_mutex);

 		cik_update_rlc(rdev, tmp);

@ -6347,7 +6371,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
 		}
 	} else {
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
-		data |= 0x00000002;
+		data |= 0x00000003;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);

@ -6370,11 +6394,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)

 		tmp = cik_halt_rlc(rdev);

+		mutex_lock(&rdev->grbm_idx_mutex);
 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
 		WREG32(RLC_SERDES_WR_CTRL, data);
+		mutex_unlock(&rdev->grbm_idx_mutex);

 		cik_update_rlc(rdev, tmp);
 	}
@ -6803,10 +6829,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
 	u32 mask = 0, tmp, tmp1;
 	int i;

+	mutex_lock(&rdev->grbm_idx_mutex);
 	cik_select_se_sh(rdev, se, sh);
 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&rdev->grbm_idx_mutex);

 	tmp &= 0xffff0000;

@ -7290,8 +7318,7 @@ static int cik_irq_init(struct radeon_device *rdev)
 int cik_irq_set(struct radeon_device *rdev)
 {
 	u32 cp_int_cntl;
-	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
-	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
+	u32 cp_m1p0;
 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
 	u32 grbm_int_cntl = 0;
@ -7325,13 +7352,6 @@ int cik_irq_set(struct radeon_device *rdev)
 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;

 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
-	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;

 	if (rdev->flags & RADEON_IS_IGP)
 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
@ -7353,33 +7373,6 @@ int cik_irq_set(struct radeon_device *rdev)
 			case 0:
 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
 				break;
-			case 1:
-				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 2:
-				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 3:
-				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			default:
-				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
-				break;
-			}
-		} else if (ring->me == 2) {
-			switch (ring->pipe) {
-			case 0:
-				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 1:
-				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 2:
-				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 3:
-				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
-				break;
 			default:
 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
 				break;
@ -7396,33 +7389,6 @@ int cik_irq_set(struct radeon_device *rdev)
 			case 0:
 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
 				break;
-			case 1:
-				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 2:
-				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 3:
-				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			default:
-				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
-				break;
-			}
-		} else if (ring->me == 2) {
-			switch (ring->pipe) {
-			case 0:
-				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 1:
-				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 2:
-				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
-				break;
-			case 3:
-				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
-				break;
 			default:
 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
 				break;
@ -7511,13 +7477,6 @@ int cik_irq_set(struct radeon_device *rdev)
 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);

 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
-	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
-	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
-	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
-	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
-	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
-	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
-	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);

 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);

@ -7834,6 +7793,10 @@ restart_ih:
 	while (rptr != wptr) {
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;
+
+//       radeon_kfd_interrupt(rdev,
+//               (const void *) &rdev->ih.ring[ring_index]);
+
 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
@ -8457,6 +8420,10 @@ static int cik_startup(struct radeon_device *rdev)
 		return r;
 	}

+//   r = radeon_kfd_resume(rdev);
+//   if (r)
+//       return r;
+
 	return 0;
 }

@ -9280,6 +9247,9 @@ void dce8_bandwidth_update(struct radeon_device *rdev)
 	u32 num_heads = 0, lb_size;
 	int i;

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	for (i = 0; i < rdev->num_crtc; i++) {
--- a/drivers/video/drm/radeon/cik_reg.h
+++ b/drivers/video/drm/radeon/cik_reg.h
@ -147,4 +147,140 @@

 #define CIK_LB_DESKTOP_HEIGHT                     0x6b0c

+#define CP_HQD_IQ_RPTR					0xC970u
+#define AQL_ENABLE					(1U << 0)
+
+#define IDLE					(1 << 2)
+
+struct cik_mqd {
+	uint32_t header;
+	uint32_t compute_dispatch_initiator;
+	uint32_t compute_dim_x;
+	uint32_t compute_dim_y;
+	uint32_t compute_dim_z;
+	uint32_t compute_start_x;
+	uint32_t compute_start_y;
+	uint32_t compute_start_z;
+	uint32_t compute_num_thread_x;
+	uint32_t compute_num_thread_y;
+	uint32_t compute_num_thread_z;
+	uint32_t compute_pipelinestat_enable;
+	uint32_t compute_perfcount_enable;
+	uint32_t compute_pgm_lo;
+	uint32_t compute_pgm_hi;
+	uint32_t compute_tba_lo;
+	uint32_t compute_tba_hi;
+	uint32_t compute_tma_lo;
+	uint32_t compute_tma_hi;
+	uint32_t compute_pgm_rsrc1;
+	uint32_t compute_pgm_rsrc2;
+	uint32_t compute_vmid;
+	uint32_t compute_resource_limits;
+	uint32_t compute_static_thread_mgmt_se0;
+	uint32_t compute_static_thread_mgmt_se1;
+	uint32_t compute_tmpring_size;
+	uint32_t compute_static_thread_mgmt_se2;
+	uint32_t compute_static_thread_mgmt_se3;
+	uint32_t compute_restart_x;
+	uint32_t compute_restart_y;
+	uint32_t compute_restart_z;
+	uint32_t compute_thread_trace_enable;
+	uint32_t compute_misc_reserved;
+	uint32_t compute_user_data_0;
+	uint32_t compute_user_data_1;
+	uint32_t compute_user_data_2;
+	uint32_t compute_user_data_3;
+	uint32_t compute_user_data_4;
+	uint32_t compute_user_data_5;
+	uint32_t compute_user_data_6;
+	uint32_t compute_user_data_7;
+	uint32_t compute_user_data_8;
+	uint32_t compute_user_data_9;
+	uint32_t compute_user_data_10;
+	uint32_t compute_user_data_11;
+	uint32_t compute_user_data_12;
+	uint32_t compute_user_data_13;
+	uint32_t compute_user_data_14;
+	uint32_t compute_user_data_15;
+	uint32_t cp_compute_csinvoc_count_lo;
+	uint32_t cp_compute_csinvoc_count_hi;
+	uint32_t cp_mqd_base_addr_lo;
+	uint32_t cp_mqd_base_addr_hi;
+	uint32_t cp_hqd_active;
+	uint32_t cp_hqd_vmid;
+	uint32_t cp_hqd_persistent_state;
+	uint32_t cp_hqd_pipe_priority;
+	uint32_t cp_hqd_queue_priority;
+	uint32_t cp_hqd_quantum;
+	uint32_t cp_hqd_pq_base_lo;
+	uint32_t cp_hqd_pq_base_hi;
+	uint32_t cp_hqd_pq_rptr;
+	uint32_t cp_hqd_pq_rptr_report_addr_lo;
+	uint32_t cp_hqd_pq_rptr_report_addr_hi;
+	uint32_t cp_hqd_pq_wptr_poll_addr_lo;
+	uint32_t cp_hqd_pq_wptr_poll_addr_hi;
+	uint32_t cp_hqd_pq_doorbell_control;
+	uint32_t cp_hqd_pq_wptr;
+	uint32_t cp_hqd_pq_control;
+	uint32_t cp_hqd_ib_base_addr_lo;
+	uint32_t cp_hqd_ib_base_addr_hi;
+	uint32_t cp_hqd_ib_rptr;
+	uint32_t cp_hqd_ib_control;
+	uint32_t cp_hqd_iq_timer;
+	uint32_t cp_hqd_iq_rptr;
+	uint32_t cp_hqd_dequeue_request;
+	uint32_t cp_hqd_dma_offload;
+	uint32_t cp_hqd_sema_cmd;
+	uint32_t cp_hqd_msg_type;
+	uint32_t cp_hqd_atomic0_preop_lo;
+	uint32_t cp_hqd_atomic0_preop_hi;
+	uint32_t cp_hqd_atomic1_preop_lo;
+	uint32_t cp_hqd_atomic1_preop_hi;
+	uint32_t cp_hqd_hq_status0;
+	uint32_t cp_hqd_hq_control0;
+	uint32_t cp_mqd_control;
+	uint32_t cp_mqd_query_time_lo;
+	uint32_t cp_mqd_query_time_hi;
+	uint32_t cp_mqd_connect_start_time_lo;
+	uint32_t cp_mqd_connect_start_time_hi;
+	uint32_t cp_mqd_connect_end_time_lo;
+	uint32_t cp_mqd_connect_end_time_hi;
+	uint32_t cp_mqd_connect_end_wf_count;
+	uint32_t cp_mqd_connect_end_pq_rptr;
+	uint32_t cp_mqd_connect_end_pq_wptr;
+	uint32_t cp_mqd_connect_end_ib_rptr;
+	uint32_t reserved_96;
+	uint32_t reserved_97;
+	uint32_t reserved_98;
+	uint32_t reserved_99;
+	uint32_t iqtimer_pkt_header;
+	uint32_t iqtimer_pkt_dw0;
+	uint32_t iqtimer_pkt_dw1;
+	uint32_t iqtimer_pkt_dw2;
+	uint32_t iqtimer_pkt_dw3;
+	uint32_t iqtimer_pkt_dw4;
+	uint32_t iqtimer_pkt_dw5;
+	uint32_t iqtimer_pkt_dw6;
+	uint32_t reserved_108;
+	uint32_t reserved_109;
+	uint32_t reserved_110;
+	uint32_t reserved_111;
+	uint32_t queue_doorbell_id0;
+	uint32_t queue_doorbell_id1;
+	uint32_t queue_doorbell_id2;
+	uint32_t queue_doorbell_id3;
+	uint32_t queue_doorbell_id4;
+	uint32_t queue_doorbell_id5;
+	uint32_t queue_doorbell_id6;
+	uint32_t queue_doorbell_id7;
+	uint32_t queue_doorbell_id8;
+	uint32_t queue_doorbell_id9;
+	uint32_t queue_doorbell_id10;
+	uint32_t queue_doorbell_id11;
+	uint32_t queue_doorbell_id12;
+	uint32_t queue_doorbell_id13;
+	uint32_t queue_doorbell_id14;
+	uint32_t queue_doorbell_id15;
+};
+
 #endif
--- a/drivers/video/drm/radeon/cik_sdma.c
+++ b/drivers/video/drm/radeon/cik_sdma.c
@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
 			      struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
-	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+	u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;

 	if (rdev->wb.enabled) {
 		u32 next_rptr = ring->wptr + 5;
@ -530,41 +530,38 @@ void cik_sdma_fini(struct radeon_device *rdev)
 * @src_offset: src GPU address
 * @dst_offset: dst GPU address
 * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
 *
 * Copy GPU paging using the DMA engine (CIK).
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
-int cik_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
 		 uint64_t src_offset, uint64_t dst_offset,
 		 unsigned num_gpu_pages,
-		 struct radeon_fence **fence)
+				  struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes;
 	int i, num_loops;
 	int r = 0;

-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
+	radeon_sync_create(&sync);

 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

-	radeon_semaphore_sync_to(sem, *fence);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);

 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
@ -582,17 +579,17 @@ int cik_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}

-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_sync_free(rdev, &sync, fence);

-	return r;
+	return fence;
 }

 /**
@ -666,17 +663,20 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	struct radeon_ib ib;
 	unsigned i;
+	unsigned index;
 	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
 	u32 tmp = 0;
+	u64 gpu_addr;

-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		index = R600_WB_DMA_RING_TEST_OFFSET;
+	else
+		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
+
+	gpu_addr = rdev->wb.gpu_addr + index;

 	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
+	rdev->wb.wb[index/4] = cpu_to_le32(tmp);

 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
 	if (r) {
@ -685,8 +685,8 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	}

 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr);
+	ib.ptr[1] = lower_32_bits(gpu_addr);
+	ib.ptr[2] = upper_32_bits(gpu_addr);
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
@ -703,7 +703,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
+		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
@ -900,25 +900,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
 * Update the page table base and flush the VM TLB
 * using sDMA (CIK).
 */
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		      unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vm->id < 8) {
-		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+	if (vm_id < 8) {
+		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
 	} else {
-		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
 	}
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);

 	/* update SH_MEM_* regs */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-	radeon_ring_write(ring, VMID(vm->id));
+	radeon_ring_write(ring, VMID(vm_id));

 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
@ -941,11 +937,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm
 	radeon_ring_write(ring, VMID(0));

 	/* flush HDP */
-	cik_sdma_hdp_flush_ring_emit(rdev, ridx);
+	cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);

 	/* flush TLB */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 }

--- a/drivers/video/drm/radeon/cikd.h
+++ b/drivers/video/drm/radeon/cikd.h
@ -30,6 +30,8 @@
 #define CIK_RB_BITMAP_WIDTH_PER_SH     2
 #define HAWAII_RB_BITMAP_WIDTH_PER_SH  4

+#define RADEON_NUM_OF_VMIDS	8
+
 /* DIDT IND registers */
 #define DIDT_SQ_CTRL0                                     0x0
 #       define DIDT_CTRL_EN                               (1 << 0)
@ -184,7 +186,10 @@
 #define		DIG_THERM_DPM(x)			((x) << 14)
 #define		DIG_THERM_DPM_MASK			0x003FC000
 #define		DIG_THERM_DPM_SHIFT			14
-
+#define	CG_THERMAL_STATUS				0xC0300008
+#define		FDO_PWM_DUTY(x)				((x) << 9)
+#define		FDO_PWM_DUTY_MASK			(0xff << 9)
+#define		FDO_PWM_DUTY_SHIFT			9
 #define	CG_THERMAL_INT					0xC030000C
 #define		CI_DIG_THERM_INTH(x)			((x) << 8)
 #define		CI_DIG_THERM_INTH_MASK			0x0000FF00
@ -194,7 +199,10 @@
 #define		CI_DIG_THERM_INTL_SHIFT			16
 #define 	THERM_INT_MASK_HIGH			(1 << 24)
 #define 	THERM_INT_MASK_LOW			(1 << 25)
-
+#define	CG_MULT_THERMAL_CTRL				0xC0300010
+#define		TEMP_SEL(x)				((x) << 20)
+#define		TEMP_SEL_MASK				(0xff << 20)
+#define		TEMP_SEL_SHIFT				20
 #define	CG_MULT_THERMAL_STATUS				0xC0300014
 #define		ASIC_MAX_TEMP(x)			((x) << 0)
 #define		ASIC_MAX_TEMP_MASK			0x000001ff
@ -203,6 +211,36 @@
 #define		CTF_TEMP_MASK				0x0003fe00
 #define		CTF_TEMP_SHIFT				9

+#define	CG_FDO_CTRL0					0xC0300064
+#define		FDO_STATIC_DUTY(x)			((x) << 0)
+#define		FDO_STATIC_DUTY_MASK			0x000000FF
+#define		FDO_STATIC_DUTY_SHIFT			0
+#define	CG_FDO_CTRL1					0xC0300068
+#define		FMAX_DUTY100(x)				((x) << 0)
+#define		FMAX_DUTY100_MASK			0x000000FF
+#define		FMAX_DUTY100_SHIFT			0
+#define	CG_FDO_CTRL2					0xC030006C
+#define		TMIN(x)					((x) << 0)
+#define		TMIN_MASK				0x000000FF
+#define		TMIN_SHIFT				0
+#define		FDO_PWM_MODE(x)				((x) << 11)
+#define		FDO_PWM_MODE_MASK			(7 << 11)
+#define		FDO_PWM_MODE_SHIFT			11
+#define		TACH_PWM_RESP_RATE(x)			((x) << 25)
+#define		TACH_PWM_RESP_RATE_MASK			(0x7f << 25)
+#define		TACH_PWM_RESP_RATE_SHIFT		25
+#define CG_TACH_CTRL                                    0xC0300070
+#       define EDGE_PER_REV(x)                          ((x) << 0)
+#       define EDGE_PER_REV_MASK                        (0x7 << 0)
+#       define EDGE_PER_REV_SHIFT                       0
+#       define TARGET_PERIOD(x)                         ((x) << 3)
+#       define TARGET_PERIOD_MASK                       0xfffffff8
+#       define TARGET_PERIOD_SHIFT                      3
+#define CG_TACH_STATUS                                  0xC0300074
+#       define TACH_PERIOD(x)                           ((x) << 0)
+#       define TACH_PERIOD_MASK                         0xffffffff
+#       define TACH_PERIOD_SHIFT                        0
+
 #define CG_ECLK_CNTL                                    0xC05000AC
 #       define ECLK_DIVIDER_MASK                        0x7f
 #       define ECLK_DIR_CNTL_EN                         (1 << 8)
@ -1137,6 +1175,9 @@
 #define			SH_MEM_ALIGNMENT_MODE_UNALIGNED			3
 #define		DEFAULT_MTYPE(x)				((x) << 4)
 #define		APE1_MTYPE(x)					((x) << 7)
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+#define	MTYPE_CACHED					0
+#define	MTYPE_NONCACHED					3

 #define	SX_DEBUG_1					0x9060

@ -1447,6 +1488,16 @@
 #define CP_HQD_ACTIVE                                     0xC91C
 #define CP_HQD_VMID                                       0xC920

+#define CP_HQD_PERSISTENT_STATE				0xC924u
+#define	DEFAULT_CP_HQD_PERSISTENT_STATE			(0x33U << 8)
+
+#define CP_HQD_PIPE_PRIORITY				0xC928u
+#define CP_HQD_QUEUE_PRIORITY				0xC92Cu
+#define CP_HQD_QUANTUM					0xC930u
+#define	QUANTUM_EN					1U
+#define	QUANTUM_SCALE_1MS				(1U << 4)
+#define	QUANTUM_DURATION(x)				((x) << 8)
+
 #define CP_HQD_PQ_BASE                                    0xC934
 #define CP_HQD_PQ_BASE_HI                                 0xC938
 #define CP_HQD_PQ_RPTR                                    0xC93C
@ -1474,12 +1525,32 @@
 #define		PRIV_STATE      			(1 << 30)
 #define		KMD_QUEUE      				(1 << 31)

+#define CP_HQD_IB_BASE_ADDR				0xC95Cu
+#define CP_HQD_IB_BASE_ADDR_HI			0xC960u
+#define CP_HQD_IB_RPTR					0xC964u
+#define CP_HQD_IB_CONTROL				0xC968u
+#define	IB_ATC_EN					(1U << 23)
+#define	DEFAULT_MIN_IB_AVAIL_SIZE			(3U << 20)
+
 #define CP_HQD_DEQUEUE_REQUEST                          0xC974
+#define	DEQUEUE_REQUEST_DRAIN				1
+#define DEQUEUE_REQUEST_RESET				2

 #define CP_MQD_CONTROL                                  0xC99C
 #define		MQD_VMID(x)				((x) << 0)
 #define		MQD_VMID_MASK      			(0xf << 0)

+#define CP_HQD_SEMA_CMD					0xC97Cu
+#define CP_HQD_MSG_TYPE					0xC980u
+#define CP_HQD_ATOMIC0_PREOP_LO			0xC984u
+#define CP_HQD_ATOMIC0_PREOP_HI			0xC988u
+#define CP_HQD_ATOMIC1_PREOP_LO			0xC98Cu
+#define CP_HQD_ATOMIC1_PREOP_HI			0xC990u
+#define CP_HQD_HQ_SCHEDULER0			0xC994u
+#define CP_HQD_HQ_SCHEDULER1			0xC998u
+
+#define SH_STATIC_MEM_CONFIG			0x9604u
+
 #define DB_RENDER_CONTROL                               0x28000

 #define PA_SC_RASTER_CONFIG                             0x28350
@ -2069,4 +2140,20 @@
 #define VCE_CMD_IB_AUTO		0x00000005
 #define VCE_CMD_SEMAPHORE	0x00000006

+#define ATC_VMID0_PASID_MAPPING					0x339Cu
+#define	ATC_VMID_PASID_MAPPING_UPDATE_STATUS	0x3398u
+#define	ATC_VMID_PASID_MAPPING_VALID				(1U << 31)
+
+#define ATC_VM_APERTURE0_CNTL					0x3310u
+#define	ATS_ACCESS_MODE_NEVER						0
+#define	ATS_ACCESS_MODE_ALWAYS						1
+
+#define ATC_VM_APERTURE0_CNTL2					0x3318u
+#define ATC_VM_APERTURE0_HIGH_ADDR				0x3308u
+#define ATC_VM_APERTURE0_LOW_ADDR				0x3300u
+#define ATC_VM_APERTURE1_CNTL					0x3314u
+#define ATC_VM_APERTURE1_CNTL2					0x331Cu
+#define ATC_VM_APERTURE1_HIGH_ADDR				0x330Cu
+#define ATC_VM_APERTURE1_LOW_ADDR				0x3304u
+
 #endif
--- a/drivers/video/drm/radeon/cmdline.c
+++ b/drivers/video/drm/radeon/cmdline.c
@ -1,7 +1,5 @@

 #include <drm/drmP.h>
-#include <drm.h>
-#include <drm_mm.h>
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "radeon_object.h"
--- a/drivers/video/drm/radeon/cypress_dpm.c
+++ b/drivers/video/drm/radeon/cypress_dpm.c
@ -24,6 +24,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "evergreend.h"
 #include "r600_dpm.h"
 #include "cypress_dpm.h"
--- a/drivers/video/drm/radeon/dce3_1_afmt.c
+++ b/drivers/video/drm/radeon/dce3_1_afmt.c
@ -32,7 +32,7 @@ static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector = NULL;
 	u32 tmp;
-	u8 *sadb;
+	u8 *sadb = NULL;
 	int sad_count;

 	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@ -165,7 +165,7 @@ void dce3_1_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *m

 	/* disable audio prior to setting up hw */
 	dig->afmt->pin = r600_audio_get_pin(rdev);
-	r600_audio_enable(rdev, dig->afmt->pin, false);
+	r600_audio_enable(rdev, dig->afmt->pin, 0);

 	r600_audio_set_dto(encoder, mode->clock);

@ -240,5 +240,5 @@ void dce3_1_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *m
 	r600_hdmi_audio_workaround(encoder);

 	/* enable audio after to setting up hw */
-	r600_audio_enable(rdev, dig->afmt->pin, true);
+	r600_audio_enable(rdev, dig->afmt->pin, 0xf);
 }
--- a/drivers/video/drm/radeon/dce6_afmt.c
+++ b/drivers/video/drm/radeon/dce6_afmt.c
@ -155,7 +155,7 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector = NULL;
 	u32 offset, tmp;
-	u8 *sadb;
+	u8 *sadb = NULL;
 	int sad_count;

 	if (!dig || !dig->afmt || !dig->afmt->pin)
@ -284,13 +284,13 @@ static int dce6_audio_chipset_supported(struct radeon_device *rdev)

 void dce6_audio_enable(struct radeon_device *rdev,
 		       struct r600_audio_pin *pin,
-		       bool enable)
+		       u8 enable_mask)
 {
 	if (!pin)
 		return;

-	WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL,
-			enable ? AUDIO_ENABLED : 0);
+	WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+			enable_mask ? AUDIO_ENABLED : 0);
 }

 static const u32 pin_offsets[7] =
--- a/drivers/video/drm/radeon/evergreen.c
+++ b/drivers/video/drm/radeon/evergreen.c
@ -22,7 +22,6 @@
 * Authors: Alex Deucher
 */
 #include <linux/firmware.h>
-//#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include "radeon.h"
@ -2346,6 +2345,9 @@ void evergreen_bandwidth_update(struct radeon_device *rdev)
 	u32 num_heads = 0, lb_size;
 	int i;

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	for (i = 0; i < rdev->num_crtc; i++) {
@ -2553,6 +2555,7 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
 					WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 					tmp |= EVERGREEN_CRTC_BLANK_DATA_EN;
 					WREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
+					WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 0);
 				}
 			} else {
 				tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
@ -3006,7 +3009,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	u32 vgt_cache_invalidation;
 	u32 hdp_host_path_cntl, tmp;
 	u32 disabled_rb_mask;
-	int i, j, num_shader_engines, ps_thread_count;
+	int i, j, ps_thread_count;

 	switch (rdev->family) {
 	case CHIP_CYPRESS:
@ -3304,8 +3307,6 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	rdev->config.evergreen.tile_config |=
 		((gb_addr_config & 0x30000000) >> 28) << 12;

-	num_shader_engines = (gb_addr_config & NUM_SHADER_ENGINES(3) >> 12) + 1;
-
 	if ((rdev->family >= CHIP_CEDAR) && (rdev->family <= CHIP_HEMLOCK)) {
 		u32 efuse_straps_4;
 		u32 efuse_straps_3;
@ -4023,7 +4024,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		if (rdev->rlc.save_restore_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
 					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
-					     &rdev->rlc.save_restore_obj);
+					     NULL, &rdev->rlc.save_restore_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
 				return r;
@ -4102,7 +4103,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		if (rdev->rlc.clear_state_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
 					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
-					     &rdev->rlc.clear_state_obj);
+					     NULL, &rdev->rlc.clear_state_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
 				sumo_rlc_fini(rdev);
@ -4179,7 +4180,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size,
 					     PAGE_SIZE, true,
 					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
-					     &rdev->rlc.cp_table_obj);
+					     NULL, &rdev->rlc.cp_table_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
 				sumo_rlc_fini(rdev);
@ -5135,9 +5136,9 @@ restart_ih:
 		/* wptr/rptr are in bytes! */
 		rptr += 16;
 		rptr &= rdev->ih.ptr_mask;
+		WREG32(IH_RB_RPTR, rptr);
 	}
 	rdev->ih.rptr = rptr;
-	WREG32(IH_RB_RPTR, rdev->ih.rptr);
 	atomic_set(&rdev->ih.lock, 0);

 	/* make sure wptr hasn't changed while processing */
--- a/drivers/video/drm/radeon/evergreen_cs.c
+++ b/drivers/video/drm/radeon/evergreen_cs.c
@ -35,7 +35,7 @@
 #define MIN(a,b)                   (((a)<(b))?(a):(b))

 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
-			   struct radeon_cs_reloc **cs_reloc);
+			   struct radeon_bo_list **cs_reloc);
 struct evergreen_cs_track {
 	u32			group_size;
 	u32			nbanks;
@ -1094,7 +1094,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
 static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	u32 last_reg;
 	u32 m, i, tmp, *ib;
 	int r;
@ -1792,7 +1792,7 @@ static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				   struct radeon_cs_packet *pkt)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct evergreen_cs_track *track;
 	volatile u32 *ib;
 	unsigned idx;
@ -2661,7 +2661,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
 			p->track = NULL;
 			return r;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 #if 0
 	for (r = 0; r < p->ib.length_dw; r++) {
 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
@ -2684,8 +2684,8 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
 **/
 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 {
-	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
-	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
+	struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
 	u32 header, cmd, count, sub_cmd;
 	volatile u32 *ib = p->ib.ptr;
 	u32 idx;
@ -3100,7 +3100,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
 			return -EINVAL;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 #if 0
 	for (r = 0; r < p->ib->length_dw; r++) {
 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
--- a/drivers/video/drm/radeon/evergreen_dma.c
+++ b/drivers/video/drm/radeon/evergreen_dma.c
@ -104,35 +104,33 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
-int evergreen_copy_dma(struct radeon_device *rdev,
-		       uint64_t src_offset, uint64_t dst_offset,
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
+					uint64_t src_offset,
+					uint64_t dst_offset,
 		       unsigned num_gpu_pages,
-		       struct radeon_fence **fence)
+					struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
 	int i, num_loops;
 	int r = 0;

-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
+	radeon_sync_create(&sync);

 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 	num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

-	radeon_semaphore_sync_to(sem, *fence);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);

 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_dw = size_in_dw;
@ -148,17 +146,17 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}

-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_sync_free(rdev, &sync, fence);

-	return r;
+	return fence;
 }

 /**
--- a/drivers/video/drm/radeon/evergreen_hdmi.c
+++ b/drivers/video/drm/radeon/evergreen_hdmi.c
@ -38,6 +38,37 @@ extern void dce6_afmt_select_pin(struct drm_encoder *encoder);
 extern void dce6_afmt_write_latency_fields(struct drm_encoder *encoder,
 					   struct drm_display_mode *mode);

+/* enable the audio stream */
+static void dce4_audio_enable(struct radeon_device *rdev,
+			      struct r600_audio_pin *pin,
+			      u8 enable_mask)
+{
+	u32 tmp = RREG32(AZ_HOT_PLUG_CONTROL);
+
+	if (!pin)
+		return;
+
+	if (enable_mask) {
+		tmp |= AUDIO_ENABLED;
+		if (enable_mask & 1)
+			tmp |= PIN0_AUDIO_ENABLED;
+		if (enable_mask & 2)
+			tmp |= PIN1_AUDIO_ENABLED;
+		if (enable_mask & 4)
+			tmp |= PIN2_AUDIO_ENABLED;
+		if (enable_mask & 8)
+			tmp |= PIN3_AUDIO_ENABLED;
+	} else {
+		tmp &= ~(AUDIO_ENABLED |
+			 PIN0_AUDIO_ENABLED |
+			 PIN1_AUDIO_ENABLED |
+			 PIN2_AUDIO_ENABLED |
+			 PIN3_AUDIO_ENABLED);
+	}
+
+	WREG32(AZ_HOT_PLUG_CONTROL, tmp);
+}
+
 /*
 * update the N and CTS parameters for a given pixel clock rate
 */
@ -102,7 +133,7 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector = NULL;
 	u32 tmp;
-	u8 *sadb;
+	u8 *sadb = NULL;
 	int sad_count;

 	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@ -318,10 +349,10 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode
 	/* disable audio prior to setting up hw */
 	if (ASIC_IS_DCE6(rdev)) {
 		dig->afmt->pin = dce6_audio_get_pin(rdev);
-		dce6_audio_enable(rdev, dig->afmt->pin, false);
+		dce6_audio_enable(rdev, dig->afmt->pin, 0);
 	} else {
 		dig->afmt->pin = r600_audio_get_pin(rdev);
-		r600_audio_enable(rdev, dig->afmt->pin, false);
+		dce4_audio_enable(rdev, dig->afmt->pin, 0);
 	}

 	evergreen_audio_set_dto(encoder, mode->clock);
@ -463,13 +494,15 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode

 	/* enable audio after to setting up hw */
 	if (ASIC_IS_DCE6(rdev))
-		dce6_audio_enable(rdev, dig->afmt->pin, true);
+		dce6_audio_enable(rdev, dig->afmt->pin, 1);
 	else
-		r600_audio_enable(rdev, dig->afmt->pin, true);
+		dce4_audio_enable(rdev, dig->afmt->pin, 0xf);
 }

 void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable)
 {
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;

@ -482,6 +515,14 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable)
 	if (!enable && !dig->afmt->enabled)
 		return;

+	if (!enable && dig->afmt->pin) {
+		if (ASIC_IS_DCE6(rdev))
+			dce6_audio_enable(rdev, dig->afmt->pin, 0);
+		else
+			dce4_audio_enable(rdev, dig->afmt->pin, 0);
+		dig->afmt->pin = NULL;
+	}
+
 	dig->afmt->enabled = enable;

 	DRM_DEBUG("%sabling HDMI interface @ 0x%04X for encoder 0x%x\n",
--- a/drivers/video/drm/radeon/kv_dpm.c
+++ b/drivers/video/drm/radeon/kv_dpm.c
@ -2800,6 +2800,8 @@ void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 		tmp = (RREG32_SMC(SMU_VOLTAGE_STATUS) & SMU_VOLTAGE_CURRENT_LEVEL_MASK) >>
 			SMU_VOLTAGE_CURRENT_LEVEL_SHIFT;
 		vddc = kv_convert_8bit_index_to_voltage(rdev, (u16)tmp);
+		seq_printf(m, "uvd    %sabled\n", pi->uvd_power_gated ? "dis" : "en");
+		seq_printf(m, "vce    %sabled\n", pi->vce_power_gated ? "dis" : "en");
 		seq_printf(m, "power level %d    sclk: %u vddc: %u\n",
 			   current_index, sclk, vddc);
 	}
--- a/drivers/video/drm/radeon/main.c
+++ b/drivers/video/drm/radeon/main.c
@ -29,7 +29,7 @@ struct drm_file   *drm_file_handlers[256];

 videomode_t usermode;

-void cpu_detect();
+void cpu_detect1();

 int _stdcall display_handler(ioctl_t *io);
 static char  log[256];
@ -117,7 +117,7 @@ void ati_driver_thread()
    asm volatile ("int $0x40"::"a"(-1));
 }

-u32_t  __attribute__((externally_visible)) drvEntry(int action, char *cmdline)
+u32  __attribute__((externally_visible)) drvEntry(int action, char *cmdline)
 {
    struct radeon_device *rdev = NULL;

@ -134,7 +134,7 @@ u32_t  __attribute__((externally_visible)) drvEntry(int action, char *cmdline)
    if( GetService("DISPLAY") != 0 )
        return 0;

-    printf("Radeon v3.17-rc5 cmdline %s\n", cmdline);
+    printf("Radeon v3.19-rc1 cmdline %s\n", cmdline);

    if( cmdline && *cmdline )
        parse_cmdline(cmdline, &usermode, log, &radeon_modeset);
@ -145,7 +145,7 @@ u32_t  __attribute__((externally_visible)) drvEntry(int action, char *cmdline)
        return 0;
    }

-    cpu_detect();
+    cpu_detect1();

    err = enum_pci_devices();
    if( unlikely(err != 0) )
@ -217,8 +217,8 @@ int r600_video_blit(uint64_t src_offset, int  x, int y,
 int _stdcall display_handler(ioctl_t *io)
 {
    int    retval = -1;
-    u32_t *inp;
-    u32_t *outp;
+    u32 *inp;
+    u32 *outp;

    inp = io->input;
    outp = io->output;
@ -273,10 +273,10 @@ int _stdcall display_handler(ioctl_t *io)
 #define PCI_CLASS_REVISION      0x08
 #define PCI_CLASS_DISPLAY_VGA   0x0300

-int pci_scan_filter(u32_t id, u32_t busnr, u32_t devfn)
+int pci_scan_filter(u32 id, u32 busnr, u32 devfn)
 {
-    u16_t vendor, device;
-    u32_t class;
+    u16 vendor, device;
+    u32 class;
    int   ret = 0;

    vendor   = id & 0xffff;
--- a/drivers/video/drm/radeon/ni.c
+++ b/drivers/video/drm/radeon/ni.c
@ -1366,6 +1366,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 	u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
 		PACKET3_SH_ACTION_ENA;

@ -1388,15 +1389,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-	radeon_ring_write(ring, ib->length_dw |
-			  (ib->vm ? (ib->vm->id << 24) : 0));
+	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

 	/* flush read cache over gart for this vmid */
 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
 	radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
 	radeon_ring_write(ring, 0xFFFFFFFF);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
+	radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }

 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
@ -1672,7 +1672,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
 	}

 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);

 	return 0;
 }
@ -1903,7 +1903,7 @@ int cayman_asic_reset(struct radeon_device *rdev)
 	if (reset_mask)
 		evergreen_gpu_pci_config_reset(rdev);

-		r600_set_bios_scratch_engine_hung(rdev, false);
+	r600_set_bios_scratch_engine_hung(rdev, false);

 	return 0;
 }
@ -1943,16 +1943,16 @@ static int cayman_startup(struct radeon_device *rdev)
 	/* scratch needs to be initialized before MC */
 	r = r600_vram_scratch_init(rdev);
 	if (r)
-				return r;
+		return r;

 	evergreen_mc_program(rdev);

 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
-	r = ni_mc_load_microcode(rdev);
-	if (r) {
-		DRM_ERROR("Failed to load MC firmware!\n");
-		return r;
-	}
+		r = ni_mc_load_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load MC firmware!\n");
+			return r;
+		}
 	}

 	r = cayman_pcie_gart_enable(rdev);
@ -2379,7 +2379,7 @@ void cayman_vm_decode_fault(struct radeon_device *rdev,
 	default:
 		block = "unknown";
 		break;
-			}
+	}

 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
 	       protections, vmid, addr,
@ -2395,15 +2395,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev,
 * Update the page table base and flush the VM TLB
 * using the CP (cayman-si).
 */
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
-	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0));
+	radeon_ring_write(ring, pd_addr >> 12);

 	/* flush hdp cache */
 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
@ -2411,7 +2407,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)

 	/* bits 0-7 are the VM contexts0-7 */
 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);

 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
--- a/drivers/video/drm/radeon/ni_dma.c
+++ b/drivers/video/drm/radeon/ni_dma.c
@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
 				struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;

 	if (rdev->wb.enabled) {
 		u32 next_rptr = ring->wptr + 4;
@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
 	 */
 	while ((ring->wptr & 7) != 5)
 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
 	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));

@ -446,16 +447,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
 		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
 }

-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
+	radeon_ring_write(ring, pd_addr >> 12);

 	/* flush hdp cache */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
@ -465,6 +462,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm
 	/* bits 0-7 are the VM contexts0-7 */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 }

--- a/drivers/video/drm/radeon/ni_dpm.c
+++ b/drivers/video/drm/radeon/ni_dpm.c
@ -23,6 +23,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "nid.h"
 #include "r600_dpm.h"
 #include "ni_dpm.h"
@ -789,7 +790,6 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev,
 	bool disable_mclk_switching;
 	u32 mclk;
 	u16 vddci;
-	u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
 	int i;

 	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@ -816,29 +816,6 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 	}

-	/* limit clocks to max supported clocks based on voltage dependency tables */
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
-							&max_sclk_vddc);
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
-							&max_mclk_vddci);
-	btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
-							&max_mclk_vddc);
-
-	for (i = 0; i < ps->performance_level_count; i++) {
-		if (max_sclk_vddc) {
-			if (ps->performance_levels[i].sclk > max_sclk_vddc)
-				ps->performance_levels[i].sclk = max_sclk_vddc;
-		}
-		if (max_mclk_vddci) {
-			if (ps->performance_levels[i].mclk > max_mclk_vddci)
-				ps->performance_levels[i].mclk = max_mclk_vddci;
-		}
-		if (max_mclk_vddc) {
-			if (ps->performance_levels[i].mclk > max_mclk_vddc)
-				ps->performance_levels[i].mclk = max_mclk_vddc;
-		}
-	}
-
 	/* XXX validate the min clocks required for display */

 	/* adjust low state */
--- a/drivers/video/drm/radeon/pci.c
+++ b/drivers/video/drm/radeon/pci.c
@ -1,20 +1,12 @@

 #include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/mod_devicetable.h>
-#include <errno-base.h>
-#include <pci.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
 #include <syscall.h>

-static inline __attribute__((const))
-bool is_power_of_2(unsigned long n)
-{
-    return (n != 0 && ((n & (n - 1)) == 0));
-}
-
-extern int pci_scan_filter(u32_t id, u32_t busnr, u32_t devfn);
+extern int pci_scan_filter(u32 id, u32 busnr, u32 devfn);

 static LIST_HEAD(devices);

@ -39,9 +31,9 @@ static inline unsigned int pci_calc_resource_flags(unsigned int flags)
 }


-static u32_t pci_size(u32_t base, u32_t maxbase, u32_t mask)
+static u32 pci_size(u32 base, u32 maxbase, u32 mask)
 {
-    u32_t size = mask & maxbase;      /* Find the significant bits */
+    u32 size = mask & maxbase;      /* Find the significant bits */

    if (!size)
        return 0;
@ -58,9 +50,9 @@ static u32_t pci_size(u32_t base, u32_t maxbase, u32_t mask)
    return size;
 }

-static u64_t pci_size64(u64_t base, u64_t maxbase, u64_t mask)
+static u64 pci_size64(u64 base, u64 maxbase, u64 mask)
 {
-    u64_t size = mask & maxbase;      /* Find the significant bits */
+    u64 size = mask & maxbase;      /* Find the significant bits */

    if (!size)
        return 0;
@ -77,7 +69,7 @@ static u64_t pci_size64(u64_t base, u64_t maxbase, u64_t mask)
    return size;
 }

-static inline int is_64bit_memory(u32_t mask)
+static inline int is_64bit_memory(u32 mask)
 {
    if ((mask & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
        (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64))
@ -87,15 +79,15 @@ static inline int is_64bit_memory(u32_t mask)

 static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 {
-    u32_t  pos, reg, next;
-    u32_t  l, sz;
+    u32  pos, reg, next;
+    u32  l, sz;
    struct resource *res;

    for(pos=0; pos < howmany; pos = next)
    {
-        u64_t  l64;
-        u64_t  sz64;
-        u32_t  raw_sz;
+        u64  l64;
+        u64  sz64;
+        u32  raw_sz;

        next = pos + 1;

@ -117,7 +109,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
        if ((l & PCI_BASE_ADDRESS_SPACE) ==
                        PCI_BASE_ADDRESS_SPACE_MEMORY)
        {
-            sz = pci_size(l, sz, (u32_t)PCI_BASE_ADDRESS_MEM_MASK);
+            sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK);
            /*
             * For 64bit prefetchable memory sz could be 0, if the
             * real size is bigger than 4G, so we need to check
@ -139,14 +131,14 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
        res->flags |= pci_calc_resource_flags(l);
        if (is_64bit_memory(l))
        {
-            u32_t szhi, lhi;
+            u32 szhi, lhi;

            lhi = PciRead32(dev->busnr, dev->devfn, reg+4);
            PciWrite32(dev->busnr, dev->devfn, reg+4, ~0);
            szhi = PciRead32(dev->busnr, dev->devfn, reg+4);
            PciWrite32(dev->busnr, dev->devfn, reg+4, lhi);
-            sz64 = ((u64_t)szhi << 32) | raw_sz;
-            l64 = ((u64_t)lhi << 32) | l;
+            sz64 = ((u64)szhi << 32) | raw_sz;
+            l64 = ((u64)lhi << 32) | l;
            sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK);
            next++;

@ -170,7 +162,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
            {
                /* 64-bit wide address, treat as disabled */
                PciWrite32(dev->busnr, dev->devfn, reg,
-                        l & ~(u32_t)PCI_BASE_ADDRESS_MEM_MASK);
+                        l & ~(u32)PCI_BASE_ADDRESS_MEM_MASK);
                PciWrite32(dev->busnr, dev->devfn, reg+4, 0);
                res->start = 0;
                res->end = sz;
@ -194,7 +186,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)

        if (sz && sz != 0xffffffff)
        {
-            sz = pci_size(l, sz, (u32_t)PCI_ROM_ADDRESS_MASK);
+            sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK);

            if (sz)
            {
@ -210,7 +202,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)

 static void pci_read_irq(struct pci_dev *dev)
 {
-    u8_t irq;
+    u8 irq;

    irq = PciRead8(dev->busnr, dev->devfn, PCI_INTERRUPT_PIN);
    dev->pin = irq;
@ -222,7 +214,7 @@ static void pci_read_irq(struct pci_dev *dev)

 int pci_setup_device(struct pci_dev *dev)
 {
-    u32_t  class;
+    u32  class;

    class = PciRead32(dev->busnr, dev->devfn, PCI_CLASS_REVISION);
    dev->revision = class & 0xff;
@ -254,7 +246,7 @@ int pci_setup_device(struct pci_dev *dev)
             */
            if (class == PCI_CLASS_STORAGE_IDE)
            {
-                u8_t progif;
+                u8 progif;

                progif = PciRead8(dev->busnr, dev->devfn,PCI_CLASS_PROG);
                if ((progif & 1) == 0)
@ -319,12 +311,12 @@ int pci_setup_device(struct pci_dev *dev)
    return 0;
 };

-static pci_dev_t* pci_scan_device(u32_t busnr, int devfn)
+static pci_dev_t* pci_scan_device(u32 busnr, int devfn)
 {
    pci_dev_t  *dev;

-    u32_t   id;
-    u8_t    hdr;
+    u32   id;
+    u8    hdr;

    int     timeout = 10;

@ -380,7 +372,7 @@ static pci_dev_t* pci_scan_device(u32_t busnr, int devfn)



-int pci_scan_slot(u32_t bus, int devfn)
+int pci_scan_slot(u32 bus, int devfn)
 {
    int  func, nr = 0;

@ -488,8 +480,8 @@ int pci_find_capability(struct pci_dev *dev, int cap)
 int enum_pci_devices()
 {
    pci_dev_t  *dev;
-    u32_t       last_bus;
-    u32_t       bus = 0 , devfn = 0;
+    u32       last_bus;
+    u32       bus = 0 , devfn = 0;


    last_bus = PciApi(1);
@ -672,11 +664,6 @@ void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 }


-struct pci_bus_region {
-    resource_size_t start;
-    resource_size_t end;
-};
-
 static inline void
 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
                         struct resource *res)
@ -775,21 +762,11 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
    loff_t start;
    void __iomem *rom;

-//    ENTER();
-
-//    dbgprintf("resource start %x end %x flags %x\n",
-//               res->start, res->end, res->flags);
    /*
     * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy
     * memory map if the VGA enable bit of the Bridge Control register is
     * set for embedded VGA.
     */
-
-    start = (loff_t)0xC0000;
-    *size = 0x20000; /* cover C000:0 through E000:0 */
-
-#if 0
-
    if (res->flags & IORESOURCE_ROM_SHADOW) {
        /* primary video rom always starts here */
        start = (loff_t)0xC0000;
@ -801,21 +778,11 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
             return (void __iomem *)(unsigned long)
             pci_resource_start(pdev, PCI_ROM_RESOURCE);
        } else {
-                /* assign the ROM an address if it doesn't have one */
-//                        if (res->parent == NULL &&
-//                            pci_assign_resource(pdev,PCI_ROM_RESOURCE))
-                     return NULL;
-//                        start = pci_resource_start(pdev, PCI_ROM_RESOURCE);
-//                        *size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-//                        if (*size == 0)
-//                                return NULL;
+    				start = (loff_t)0xC0000;
+    				*size = 0x20000; /* cover C000:0 through E000:0 */

-             /* Enable ROM space decodes */
-//                        if (pci_enable_rom(pdev))
-//                                return NULL;
        }
    }
-#endif

    rom = ioremap(start, *size);
    if (!rom) {
@ -833,7 +800,6 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
     * True size is important if the ROM is going to be copied.
     */
    *size = pci_get_rom_size(pdev, rom, *size);
-//    LEAVE();
    return rom;
 }

@ -861,6 +827,8 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
    else
        cmd = old_cmd & ~PCI_COMMAND_MASTER;
    if (cmd != old_cmd) {
+            dbgprintf("%s bus mastering\n",
+                    enable ? "enabling" : "disabling");
        pci_write_config_word(dev, PCI_COMMAND, cmd);
        }
    dev->is_busmaster = enable;
--- a/drivers/video/drm/radeon/ppsmc.h
+++ b/drivers/video/drm/radeon/ppsmc.h
@ -56,6 +56,14 @@
 #define PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE 0x20
 #define PPSMC_STATEFLAG_DEEPSLEEP_BYPASS   0x40

+#define FDO_MODE_HARDWARE 0
+#define FDO_MODE_PIECE_WISE_LINEAR 1
+
+enum FAN_CONTROL {
+	FAN_CONTROL_FUZZY,
+	FAN_CONTROL_TABLE
+};
+
 #define PPSMC_Result_OK             ((uint8_t)0x01)
 #define PPSMC_Result_Failed         ((uint8_t)0xFF)

@ -79,6 +87,8 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_DisableCac                ((uint8_t)0x54)
 #define PPSMC_TDPClampingActive             ((uint8_t)0x59)
 #define PPSMC_TDPClampingInactive           ((uint8_t)0x5A)
+#define PPSMC_StartFanControl               ((uint8_t)0x5B)
+#define PPSMC_StopFanControl                ((uint8_t)0x5C)
 #define PPSMC_MSG_NoDisplay                 ((uint8_t)0x5D)
 #define PPSMC_MSG_HasDisplay                ((uint8_t)0x5E)
 #define PPSMC_MSG_UVDPowerOFF               ((uint8_t)0x60)
@ -106,6 +116,7 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_SAMUDPM_SetEnabledMask      ((uint16_t) 0x130)
 #define PPSMC_MSG_MCLKDPM_ForceState          ((uint16_t) 0x131)
 #define PPSMC_MSG_MCLKDPM_NoForcedLevel       ((uint16_t) 0x132)
+#define PPSMC_MSG_Thermal_Cntl_Disable        ((uint16_t) 0x133)
 #define PPSMC_MSG_Voltage_Cntl_Disable        ((uint16_t) 0x135)
 #define PPSMC_MSG_PCIeDPM_Enable              ((uint16_t) 0x136)
 #define PPSMC_MSG_PCIeDPM_Disable             ((uint16_t) 0x13d)
@ -149,6 +160,10 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_MASTER_DeepSleep_ON         ((uint16_t) 0x18F)
 #define PPSMC_MSG_MASTER_DeepSleep_OFF        ((uint16_t) 0x190)
 #define PPSMC_MSG_Remove_DC_Clamp             ((uint16_t) 0x191)
+#define PPSMC_MSG_SetFanPwmMax                ((uint16_t) 0x19A)
+
+#define PPSMC_MSG_ENABLE_THERMAL_DPM          ((uint16_t) 0x19C)
+#define PPSMC_MSG_DISABLE_THERMAL_DPM         ((uint16_t) 0x19D)

 #define PPSMC_MSG_API_GetSclkFrequency        ((uint16_t) 0x200)
 #define PPSMC_MSG_API_GetMclkFrequency        ((uint16_t) 0x201)
@ -157,10 +172,11 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_DPM_Config                ((uint32_t) 0x102)
 #define PPSMC_MSG_DPM_ForceState            ((uint32_t) 0x104)
 #define PPSMC_MSG_PG_SIMD_Config            ((uint32_t) 0x108)
-#define PPSMC_MSG_DPM_N_LevelsDisabled      ((uint32_t) 0x112)
+#define PPSMC_MSG_Thermal_Cntl_Enable       ((uint32_t) 0x10a)
 #define PPSMC_MSG_Voltage_Cntl_Enable       ((uint32_t) 0x109)
 #define PPSMC_MSG_VCEPowerOFF               ((uint32_t) 0x10e)
 #define PPSMC_MSG_VCEPowerON                ((uint32_t) 0x10f)
+#define PPSMC_MSG_DPM_N_LevelsDisabled      ((uint32_t) 0x112)
 #define PPSMC_MSG_DCE_RemoveVoltageAdjustment   ((uint32_t) 0x11d)
 #define PPSMC_MSG_DCE_AllowVoltageAdjustment    ((uint32_t) 0x11e)
 #define PPSMC_MSG_EnableBAPM                ((uint32_t) 0x120)
--- a/drivers/video/drm/radeon/pptable.h
+++ b/drivers/video/drm/radeon/pptable.h
@ -96,6 +96,14 @@ typedef struct _ATOM_PPLIB_FANTABLE2
    USHORT  usTMax;                          // The max temperature
 } ATOM_PPLIB_FANTABLE2;

+typedef struct _ATOM_PPLIB_FANTABLE3
+{
+	ATOM_PPLIB_FANTABLE2 basicTable2;
+	UCHAR ucFanControlMode;
+	USHORT usFanPWMMax;
+	USHORT usFanOutputSensitivity;
+} ATOM_PPLIB_FANTABLE3;
+
 typedef struct _ATOM_PPLIB_EXTENDEDHEADER
 {
    USHORT  usSize;
--- a/drivers/video/drm/radeon/r100.c
+++ b/drivers/video/drm/radeon/r100.c
@ -869,13 +869,14 @@ bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 	return false;
 }

-int r100_copy_blit(struct radeon_device *rdev,
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence)
+				    struct reservation_object *resv)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_fence *fence;
 	uint32_t cur_pages;
 	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 	uint32_t pitch;
@ -896,7 +897,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 	r = radeon_ring_lock(rdev, ring, ndw);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 	while (num_gpu_pages > 0) {
 		cur_pages = num_gpu_pages;
@ -936,11 +937,13 @@ int r100_copy_blit(struct radeon_device *rdev,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_HOST_IDLECLEAN |
 			  RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return ERR_PTR(r);
 	}
 	radeon_ring_unlock_commit(rdev, ring, false);
-	return r;
+	return fence;
 }

 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
@ -1247,7 +1250,7 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
 	int r;
 	u32 tile_flags = 0;
 	u32 tmp;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	u32 value;

 	r = radeon_cs_packet_next_reloc(p, &reloc, 0);
@ -1286,7 +1289,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
 			     int idx)
 {
 	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	int r = 0;
 	volatile uint32_t *ib;
@ -1535,7 +1538,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			      struct radeon_cs_packet *pkt,
 			      unsigned idx, unsigned reg)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	volatile uint32_t *ib;
 	uint32_t tmp;
@ -1894,7 +1897,7 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
 static int r100_packet3_check(struct radeon_cs_parser *p,
 			      struct radeon_cs_packet *pkt)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	unsigned idx;
 	volatile uint32_t *ib;
@ -2054,7 +2057,7 @@ int r100_cs_parse(struct radeon_cs_parser *p)
 		}
 		if (r)
 			return r;
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 	return 0;
 }

@ -3200,6 +3203,9 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	uint32_t pixel_bytes1 = 0;
 	uint32_t pixel_bytes2 = 0;

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	if (rdev->mode_info.crtcs[0]->base.enabled) {
--- a/drivers/video/drm/radeon/r200.c
+++ b/drivers/video/drm/radeon/r200.c
@ -80,13 +80,14 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0)
 	return vtx_size;
 }

-int r200_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset,
 		  uint64_t dst_offset,
 		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+				   struct reservation_object *resv)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_fence *fence;
 	uint32_t size;
 	uint32_t cur_size;
 	int i, num_loops;
@ -98,7 +99,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
@ -118,11 +119,13 @@ int r200_copy_dma(struct radeon_device *rdev,
 	}
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 	radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return ERR_PTR(r);
 	}
 	radeon_ring_unlock_commit(rdev, ring, false);
-	return r;
+	return fence;
 }


@ -143,7 +146,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		       struct radeon_cs_packet *pkt,
 		       unsigned idx, unsigned reg)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	volatile uint32_t *ib;
 	uint32_t tmp;
--- a/drivers/video/drm/radeon/r300.c
+++ b/drivers/video/drm/radeon/r300.c
@ -598,7 +598,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		struct radeon_cs_packet *pkt,
 		unsigned idx, unsigned reg)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	volatile uint32_t *ib;
 	uint32_t tmp, tile_flags = 0;
@ -1142,7 +1142,7 @@ fail:
 static int r300_packet3_check(struct radeon_cs_parser *p,
 			      struct radeon_cs_packet *pkt)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r100_cs_track *track;
 	volatile uint32_t *ib;
 	unsigned idx;
@ -1283,7 +1283,7 @@ int r300_cs_parse(struct radeon_cs_parser *p)
 		if (r) {
 			return r;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 	return 0;
 }

--- a/drivers/video/drm/radeon/r600.c
+++ b/drivers/video/drm/radeon/r600.c
@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev)

 int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
+	unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0;
+	int r;
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~(
+		 UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK));
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL,
+			 ~UPLL_BYPASS_CNTL);
+
+	if (!vclk || !dclk) {
+		/* keep the Bypass mode, put PLL to sleep */
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+		return 0;
+	}
+
+	if (rdev->clock.spll.reference_freq == 10000)
+		ref_div = 34;
+	else
+		ref_div = 4;
+
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
+					  ref_div + 1, 0xFFF, 2, 30, ~0,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
+
+	if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780)
+		fb_div >>= 1;
+	else
+		fb_div |= 1;
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+        if (r)
+                return r;
+
+	/* assert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* For RS780 we have to choose ref clk */
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK,
+			 ~UPLL_REFCLK_SRC_SEL_MASK);
+
+	/* set the required fb, ref and post divder values */
+	WREG32_P(CG_UPLL_FUNC_CNTL,
+		 UPLL_FB_DIV(fb_div) |
+		 UPLL_REF_DIV(ref_div),
+		 ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK));
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 UPLL_SW_HILEN(vclk_div >> 1) |
+		 UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
+		 UPLL_SW_HILEN2(dclk_div >> 1) |
+		 UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) |
+		 UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK,
+		 ~UPLL_SW_MASK);
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* deassert BYPASS EN */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL);
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
 	return 0;
 }

@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	radeon_gart_table_vram_unpin(rdev);
 }

@ -1338,7 +1430,7 @@ int r600_vram_scratch_init(struct radeon_device *rdev)
 	if (rdev->vram_scratch.robj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, &rdev->vram_scratch.robj);
+				     0, NULL, NULL, &rdev->vram_scratch.robj);
 		if (r) {
 			return r;
 		}
@ -2792,35 +2884,32 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev,
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
-int r600_copy_cpdma(struct radeon_device *rdev,
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
 		  uint64_t src_offset, uint64_t dst_offset,
 		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+				     struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, tmp;
 	int i, num_loops;
 	int r = 0;

-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
+	radeon_sync_create(&sync);

 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

-	radeon_semaphore_sync_to(sem, *fence);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);

 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@ -2846,17 +2935,17 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit);

-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_sync_free(rdev, &sync, fence);

-	return r;
+	return fence;
 }

 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@ -3171,7 +3260,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev)
 		r = radeon_bo_create(rdev, rdev->ih.ring_size,
 				     PAGE_SIZE, true,
 				     RADEON_GEM_DOMAIN_GTT, 0,
-				     NULL, &rdev->ih.ring_obj);
+				     NULL, NULL, &rdev->ih.ring_obj);
 		if (r) {
 			DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r);
 			return r;
--- a/drivers/video/drm/radeon/r600_cs.c
+++ b/drivers/video/drm/radeon/r600_cs.c
@ -969,7 +969,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
 static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	u32 m, i, tmp, *ib;
 	int r;

@ -1626,7 +1626,7 @@ static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 static int r600_packet3_check(struct radeon_cs_parser *p,
 				struct radeon_cs_packet *pkt)
 {
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	struct r600_cs_track *track;
 	volatile u32 *ib;
 	unsigned idx;
@ -2316,7 +2316,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
 			p->track = NULL;
 			return r;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 #if 0
 	for (r = 0; r < p->ib.length_dw; r++) {
 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
@ -2351,10 +2351,10 @@ static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)

 static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
 {
-	if (p->chunk_relocs_idx == -1) {
+	if (p->chunk_relocs == NULL) {
 		return 0;
 	}
-	p->relocs = kzalloc(sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	p->relocs = kzalloc(sizeof(struct radeon_bo_list), GFP_KERNEL);
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
@ -2398,7 +2398,7 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
 	/* Copy the packet into the IB, the parser will read from the
 	 * input memory (cached) and write to the IB (which can be
 	 * uncached). */
-	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+	ib_chunk = parser.chunk_ib;
 	parser.ib.length_dw = ib_chunk->length_dw;
 	*l = parser.ib.length_dw;
 	if (copy_from_user(ib, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) {
@ -2435,24 +2435,24 @@ void r600_cs_legacy_init(void)
 * GPU offset using the provided start.
 **/
 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
-			   struct radeon_cs_reloc **cs_reloc)
+			   struct radeon_bo_list **cs_reloc)
 {
 	struct radeon_cs_chunk *relocs_chunk;
 	unsigned idx;

 	*cs_reloc = NULL;
-	if (p->chunk_relocs_idx == -1) {
+	if (p->chunk_relocs == NULL) {
 		DRM_ERROR("No relocation chunk !\n");
 		return -EINVAL;
 	}
-	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	relocs_chunk = p->chunk_relocs;
 	idx = p->dma_reloc_idx;
 	if (idx >= p->nrelocs) {
 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 			  idx, p->nrelocs);
 		return -EINVAL;
 	}
-	*cs_reloc = p->relocs_ptr[idx];
+	*cs_reloc = &p->relocs[idx];
 	p->dma_reloc_idx++;
 	return 0;
 }
@ -2472,8 +2472,8 @@ int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
 **/
 int r600_dma_cs_parse(struct radeon_cs_parser *p)
 {
-	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
-	struct radeon_cs_reloc *src_reloc, *dst_reloc;
+	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
+	struct radeon_bo_list *src_reloc, *dst_reloc;
 	u32 header, cmd, count, tiled;
 	volatile u32 *ib = p->ib.ptr;
 	u32 idx, idx_value;
@ -2619,7 +2619,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
 			return -EINVAL;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);
 #if 0
 	for (r = 0; r < p->ib->length_dw; r++) {
 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
--- a/drivers/video/drm/radeon/r600_dma.c
+++ b/drivers/video/drm/radeon/r600_dma.c
@ -338,17 +338,17 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	struct radeon_ib ib;
 	unsigned i;
+	unsigned index;
 	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
 	u32 tmp = 0;
+	u64 gpu_addr;

-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		index = R600_WB_DMA_RING_TEST_OFFSET;
+	else
+		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;

-	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
+	gpu_addr = rdev->wb.gpu_addr + index;

 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
 	if (r) {
@ -357,8 +357,8 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	}

 	ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
-	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+	ib.ptr[1] = lower_32_bits(gpu_addr);
+	ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff;
 	ib.ptr[3] = 0xDEADBEEF;
 	ib.length_dw = 4;

@ -374,7 +374,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
+		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
@ -430,41 +430,38 @@ void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 * @src_offset: src GPU address
 * @dst_offset: dst GPU address
 * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
 *
 * Copy GPU paging using the DMA engine (r6xx).
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
-int r600_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset, uint64_t dst_offset,
 		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+				   struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
 	int i, num_loops;
 	int r = 0;

-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
+	radeon_sync_create(&sync);

 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
 	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

-	radeon_semaphore_sync_to(sem, *fence);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);

 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_dw = size_in_dw;
@ -480,15 +477,15 @@ int r600_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}

-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		radeon_sync_free(rdev, &sync, NULL);
+		return ERR_PTR(r);
 	}

 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_sync_free(rdev, &sync, fence);

-	return r;
+	return fence;
 }
--- a/drivers/video/drm/radeon/r600_dpm.c
+++ b/drivers/video/drm/radeon/r600_dpm.c
@ -24,6 +24,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "r600d.h"
 #include "r600_dpm.h"
 #include "atom.h"
@ -810,6 +811,7 @@ union power_info {
 union fan_info {
 	struct _ATOM_PPLIB_FANTABLE fan;
 	struct _ATOM_PPLIB_FANTABLE2 fan2;
+	struct _ATOM_PPLIB_FANTABLE3 fan3;
 };

 static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependency_table *radeon_table,
@ -899,6 +901,14 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 			else
 				rdev->pm.dpm.fan.t_max = 10900;
 			rdev->pm.dpm.fan.cycle_delay = 100000;
+			if (fan_info->fan.ucFanTableFormat >= 3) {
+				rdev->pm.dpm.fan.control_mode = fan_info->fan3.ucFanControlMode;
+				rdev->pm.dpm.fan.default_max_fan_pwm =
+					le16_to_cpu(fan_info->fan3.usFanPWMMax);
+				rdev->pm.dpm.fan.default_fan_output_sensitivity = 4836;
+				rdev->pm.dpm.fan.fan_output_sensitivity =
+					le16_to_cpu(fan_info->fan3.usFanOutputSensitivity);
+			}
 			rdev->pm.dpm.fan.ucode_fan_control = true;
 		}
 	}
@ -1255,7 +1265,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 					(mode_info->atom_context->bios + data_offset +
 					 le16_to_cpu(ext_hdr->usPowerTuneTableOffset));
 				rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit =
-					ppt->usMaximumPowerDeliveryLimit;
+					le16_to_cpu(ppt->usMaximumPowerDeliveryLimit);
 				pt = &ppt->power_tune_table;
 			} else {
 				ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *)
--- a/drivers/video/drm/radeon/r600_dpm.h
+++ b/drivers/video/drm/radeon/r600_dpm.h
@ -96,6 +96,9 @@
 #define R600_TEMP_RANGE_MIN (90 * 1000)
 #define R600_TEMP_RANGE_MAX (120 * 1000)

+#define FDO_PWM_MODE_STATIC  1
+#define FDO_PWM_MODE_STATIC_RPM 5
+
 enum r600_power_level {
 	R600_POWER_LEVEL_LOW = 0,
 	R600_POWER_LEVEL_MEDIUM = 1,
--- a/drivers/video/drm/radeon/r600_hdmi.c
+++ b/drivers/video/drm/radeon/r600_hdmi.c
@ -70,6 +70,169 @@ static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = {
 };


+/*
+ * check if the chipset is supported
+ */
+static int r600_audio_chipset_supported(struct radeon_device *rdev)
+{
+	return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev);
+}
+
+static struct r600_audio_pin r600_audio_status(struct radeon_device *rdev)
+{
+	struct r600_audio_pin status;
+	uint32_t value;
+
+	value = RREG32(R600_AUDIO_RATE_BPS_CHANNEL);
+
+	/* number of channels */
+	status.channels = (value & 0x7) + 1;
+
+	/* bits per sample */
+	switch ((value & 0xF0) >> 4) {
+	case 0x0:
+		status.bits_per_sample = 8;
+		break;
+	case 0x1:
+		status.bits_per_sample = 16;
+		break;
+	case 0x2:
+		status.bits_per_sample = 20;
+		break;
+	case 0x3:
+		status.bits_per_sample = 24;
+		break;
+	case 0x4:
+		status.bits_per_sample = 32;
+		break;
+	default:
+		dev_err(rdev->dev, "Unknown bits per sample 0x%x, using 16\n",
+			(int)value);
+		status.bits_per_sample = 16;
+	}
+
+	/* current sampling rate in HZ */
+	if (value & 0x4000)
+		status.rate = 44100;
+	else
+		status.rate = 48000;
+	status.rate *= ((value >> 11) & 0x7) + 1;
+	status.rate /= ((value >> 8) & 0x7) + 1;
+
+	value = RREG32(R600_AUDIO_STATUS_BITS);
+
+	/* iec 60958 status bits */
+	status.status_bits = value & 0xff;
+
+	/* iec 60958 category code */
+	status.category_code = (value >> 8) & 0xff;
+
+	return status;
+}
+
+/*
+ * update all hdmi interfaces with current audio parameters
+ */
+void r600_audio_update_hdmi(struct work_struct *work)
+{
+	struct radeon_device *rdev = container_of(work, struct radeon_device,
+						  audio_work);
+	struct drm_device *dev = rdev->ddev;
+	struct r600_audio_pin audio_status = r600_audio_status(rdev);
+	struct drm_encoder *encoder;
+	bool changed = false;
+
+	if (rdev->audio.pin[0].channels != audio_status.channels ||
+	    rdev->audio.pin[0].rate != audio_status.rate ||
+	    rdev->audio.pin[0].bits_per_sample != audio_status.bits_per_sample ||
+	    rdev->audio.pin[0].status_bits != audio_status.status_bits ||
+	    rdev->audio.pin[0].category_code != audio_status.category_code) {
+		rdev->audio.pin[0] = audio_status;
+		changed = true;
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (!radeon_encoder_is_digital(encoder))
+			continue;
+		if (changed || r600_hdmi_buffer_status_changed(encoder))
+			r600_hdmi_update_audio_settings(encoder);
+	}
+}
+
+/* enable the audio stream */
+void r600_audio_enable(struct radeon_device *rdev,
+		       struct r600_audio_pin *pin,
+		       u8 enable_mask)
+{
+	u32 tmp = RREG32(AZ_HOT_PLUG_CONTROL);
+
+	if (!pin)
+		return;
+
+	if (enable_mask) {
+		tmp |= AUDIO_ENABLED;
+		if (enable_mask & 1)
+			tmp |= PIN0_AUDIO_ENABLED;
+		if (enable_mask & 2)
+			tmp |= PIN1_AUDIO_ENABLED;
+		if (enable_mask & 4)
+			tmp |= PIN2_AUDIO_ENABLED;
+		if (enable_mask & 8)
+			tmp |= PIN3_AUDIO_ENABLED;
+	} else {
+		tmp &= ~(AUDIO_ENABLED |
+			 PIN0_AUDIO_ENABLED |
+			 PIN1_AUDIO_ENABLED |
+			 PIN2_AUDIO_ENABLED |
+			 PIN3_AUDIO_ENABLED);
+	}
+
+	WREG32(AZ_HOT_PLUG_CONTROL, tmp);
+}
+
+/*
+ * initialize the audio vars
+ */
+int r600_audio_init(struct radeon_device *rdev)
+{
+	if (!radeon_audio || !r600_audio_chipset_supported(rdev))
+		return 0;
+
+	rdev->audio.enabled = true;
+
+	rdev->audio.num_pins = 1;
+	rdev->audio.pin[0].channels = -1;
+	rdev->audio.pin[0].rate = -1;
+	rdev->audio.pin[0].bits_per_sample = -1;
+	rdev->audio.pin[0].status_bits = 0;
+	rdev->audio.pin[0].category_code = 0;
+	rdev->audio.pin[0].id = 0;
+	/* disable audio.  it will be set up later */
+	r600_audio_enable(rdev, &rdev->audio.pin[0], 0);
+
+	return 0;
+}
+
+/*
+ * release the audio timer
+ * TODO: How to do this correctly on SMP systems?
+ */
+void r600_audio_fini(struct radeon_device *rdev)
+{
+	if (!rdev->audio.enabled)
+		return;
+
+	r600_audio_enable(rdev, &rdev->audio.pin[0], 0);
+
+	rdev->audio.enabled = false;
+}
+
+struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev)
+{
+	/* only one pin on 6xx-NI */
+	return &rdev->audio.pin[0];
+}
+
 /*
 * calculate CTS and N values if they are not found in the table
 */
@ -356,7 +519,7 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod

 	/* disable audio prior to setting up hw */
 	dig->afmt->pin = r600_audio_get_pin(rdev);
-	r600_audio_enable(rdev, dig->afmt->pin, false);
+	r600_audio_enable(rdev, dig->afmt->pin, 0xf);

 	r600_audio_set_dto(encoder, mode->clock);

@ -442,7 +605,7 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod
 	WREG32(HDMI0_RAMP_CONTROL3 + offset, 0x00000001);

 	/* enable audio after to setting up hw */
-	r600_audio_enable(rdev, dig->afmt->pin, true);
+	r600_audio_enable(rdev, dig->afmt->pin, 0xf);
 }

 /**
@ -527,6 +690,11 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable)
 	if (!enable && !dig->afmt->enabled)
 		return;

+	if (!enable && dig->afmt->pin) {
+		r600_audio_enable(rdev, dig->afmt->pin, 0);
+		dig->afmt->pin = NULL;
+	}
+
 	/* Older chipsets require setting HDMI and routing manually */
 	if (!ASIC_IS_DCE3(rdev)) {
 		if (enable)
--- a/drivers/video/drm/radeon/r600d.h
+++ b/drivers/video/drm/radeon/r600d.h
@ -323,11 +323,12 @@
 #define	HDP_TILING_CONFIG				0x2F3C
 #define HDP_DEBUG1                                      0x2F34

+#define MC_CONFIG					0x2000
 #define MC_VM_AGP_TOP					0x2184
 #define MC_VM_AGP_BOT					0x2188
 #define	MC_VM_AGP_BASE					0x218C
 #define MC_VM_FB_LOCATION				0x2180
-#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
+#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL			0x2124
 #define 	ENABLE_L1_TLB					(1 << 0)
 #define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
 #define		ENABLE_L1_STRICT_ORDERING			(1 << 2)
@ -347,12 +348,14 @@
 #define		EFFECTIVE_L1_QUEUE_SIZE(x)			(((x) & 7) << 15)
 #define		EFFECTIVE_L1_QUEUE_SIZE_MASK			0x00038000
 #define		EFFECTIVE_L1_QUEUE_SIZE_SHIFT			15
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
 #define MC_VM_L1_TLB_MCD_RD_B_CNTL			0x21A0
 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL			0x21FC
 #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL			0x2204
 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL			0x2208
 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL			0x220C
 #define	MC_VM_L1_TLB_MCB_RD_SYS_CNTL			0x2200
+#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL			0x212c
 #define MC_VM_L1_TLB_MCD_WR_A_CNTL			0x21A4
 #define MC_VM_L1_TLB_MCD_WR_B_CNTL			0x21A8
 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL			0x2210
@ -366,6 +369,8 @@
 #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2194
 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x2198

+#define RS_DQ_RD_RET_CONF				0x2348
+
 #define	PA_CL_ENHANCE					0x8A14
 #define		CLIP_VTX_REORDER_ENA				(1 << 0)
 #define		NUM_CLIP_SEQ(x)					((x) << 1)
@ -922,6 +927,23 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)

+/* Audio */
+#define AZ_HOT_PLUG_CONTROL               0x7300
+#       define AZ_FORCE_CODEC_WAKE        (1 << 0)
+#       define JACK_DETECTION_ENABLE      (1 << 4)
+#       define UNSOLICITED_RESPONSE_ENABLE (1 << 8)
+#       define CODEC_HOT_PLUG_ENABLE      (1 << 12)
+#       define AUDIO_ENABLED              (1 << 31)
+/* DCE3 adds */
+#       define PIN0_JACK_DETECTION_ENABLE (1 << 4)
+#       define PIN1_JACK_DETECTION_ENABLE (1 << 5)
+#       define PIN2_JACK_DETECTION_ENABLE (1 << 6)
+#       define PIN3_JACK_DETECTION_ENABLE (1 << 7)
+#       define PIN0_AUDIO_ENABLED         (1 << 24)
+#       define PIN1_AUDIO_ENABLED         (1 << 25)
+#       define PIN2_AUDIO_ENABLED         (1 << 26)
+#       define PIN3_AUDIO_ENABLED         (1 << 27)
+
 /* Audio clocks DCE 2.0/3.0 */
 #define AUDIO_DTO                         0x7340
 #       define AUDIO_DTO_PHASE(x)         (((x) & 0xffff) << 0)
@ -1476,6 +1498,7 @@
 #define UVD_CGC_GATE					0xf4a8
 #define UVD_LMI_CTRL2					0xf4f4
 #define UVD_MASTINT_EN					0xf500
+#define UVD_FW_START					0xf51C
 #define UVD_LMI_ADDR_EXT				0xf594
 #define UVD_LMI_CTRL					0xf598
 #define UVD_LMI_SWAP_CNTL				0xf5b4
@ -1488,6 +1511,13 @@
 #define UVD_MPC_SET_MUX					0xf5f4
 #define UVD_MPC_SET_ALU					0xf5f8

+#define UVD_VCPU_CACHE_OFFSET0				0xf608
+#define UVD_VCPU_CACHE_SIZE0				0xf60c
+#define UVD_VCPU_CACHE_OFFSET1				0xf610
+#define UVD_VCPU_CACHE_SIZE1				0xf614
+#define UVD_VCPU_CACHE_OFFSET2				0xf618
+#define UVD_VCPU_CACHE_SIZE2				0xf61c
+
 #define UVD_VCPU_CNTL					0xf660
 #define UVD_SOFT_RESET					0xf680
 #define		RBC_SOFT_RESET					(1<<0)
@ -1517,9 +1547,35 @@

 #define UVD_CONTEXT_ID					0xf6f4

+/* rs780 only */
+#define	GFX_MACRO_BYPASS_CNTL				0x30c0
+#define		SPLL_BYPASS_CNTL			(1 << 0)
+#define		UPLL_BYPASS_CNTL			(1 << 1)
+
+#define CG_UPLL_FUNC_CNTL				0x7e0
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_FB_DIV(x)				((x) << 4)
+#	define UPLL_FB_DIV_MASK				0x0000FFF0
+#	define UPLL_REF_DIV(x)				((x) << 16)
+#	define UPLL_REF_DIV_MASK			0x003F0000
+#	define UPLL_REFCLK_SRC_SEL_MASK			0x20000000
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
+#define CG_UPLL_FUNC_CNTL_2				0x7e4
+#	define UPLL_SW_HILEN(x)				((x) << 0)
+#	define UPLL_SW_LOLEN(x)				((x) << 4)
+#	define UPLL_SW_HILEN2(x)			((x) << 8)
+#	define UPLL_SW_LOLEN2(x)			((x) << 12)
+#	define UPLL_DIVEN_MASK				0x00010000
+#	define UPLL_DIVEN2_MASK				0x00020000
+#	define UPLL_SW_MASK				0x0003FFFF
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000

 /*
 * PM4
--- a/drivers/video/drm/radeon/radeon.h
+++ b/drivers/video/drm/radeon/radeon.h
@ -60,12 +60,13 @@
 *                          are considered as fatal)
 */

-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
 #include <asm/div64.h>
+#include <linux/fence.h>

 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@ -73,10 +74,10 @@
 //#include <ttm/ttm_module.h>
 #include <ttm/ttm_execbuf_util.h>

-#include <linux/irqreturn.h>
-#include <pci.h>
+#include <drm/drm_gem.h>

-#include <errno-base.h>
+#include <linux/irqreturn.h>
+#include <linux/pci.h>

 #include "radeon_family.h"
 #include "radeon_mode.h"
@ -154,9 +155,6 @@ static inline u32 ioread32(const volatile void __iomem *addr)
 #define RADEONFB_CONN_LIMIT             4
 #define RADEON_BIOS_NUM_SCRATCH		8

-/* fence seq are set to this number when signaled */
-#define RADEON_FENCE_SIGNALED_SEQ		0LL
-
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
 #define RADEON_RING_TYPE_GFX_INDEX  0
@ -183,9 +181,6 @@ static inline u32 ioread32(const volatile void __iomem *addr)
 /* number of hw syncs before falling back on blocking */
 #define RADEON_NUM_SYNCS			4

-/* number of hw syncs before falling back on blocking */
-#define RADEON_NUM_SYNCS			4
-
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET			(1 << 20)
 #define RADEON_VA_RESERVED_SIZE		(8 << 20)
@ -384,28 +379,33 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
 * Fences.
 */
 struct radeon_fence_driver {
+	struct radeon_device		*rdev;
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
 	uint64_t			sync_seq[RADEON_NUM_RINGS];
 	atomic64_t			last_seq;
-	bool				initialized;
+	bool				initialized, delayed_irq;
+	struct delayed_work		lockup_work;
 };

 struct radeon_fence {
+	struct fence		base;
+
    struct radeon_device   *rdev;
-    struct kref             kref;
-	/* protected by radeon_fence.lock */
 	uint64_t			seq;
 	/* RB, DMA, etc. */
 	unsigned			ring;
+	bool			is_vm_update;
+
+	wait_queue_t		fence_wake;
 };

 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
-void radeon_fence_driver_force_completion(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
@ -481,12 +481,22 @@ struct radeon_mman {
 #endif
 };

+struct radeon_bo_list {
+	struct radeon_bo		*robj;
+	struct ttm_validate_buffer	tv;
+	uint64_t			gpu_offset;
+	unsigned			prefered_domains;
+	unsigned			allowed_domains;
+	uint32_t			tiling_flags;
+};
+
 /* bo virtual address in a specific vm */
 struct radeon_bo_va {
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
 	uint32_t			flags;
 	uint64_t			addr;
+	struct radeon_fence		*last_pt_update;
 	unsigned			ref_count;

 	/* protected by vm mutex */
@ -503,7 +513,7 @@ struct radeon_bo {
 	struct list_head		list;
 	/* Protected by tbo.reserved */
 	u32				initial_domain;
-	u32				placements[3];
+	struct ttm_place		placements[4];
    struct ttm_placement        placement;
    struct ttm_buffer_object    tbo;
 	struct ttm_bo_kmap_obj		kmap;
@ -522,6 +532,8 @@ struct radeon_bo {
 	struct drm_gem_object		gem_base;

 	pid_t				pid;
+
+	struct radeon_mn		*mn;
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)

@ -604,7 +616,6 @@ struct radeon_semaphore {
 	struct radeon_sa_bo		*sa_bo;
 	signed				waiters;
 	uint64_t			gpu_addr;
-	struct radeon_fence		*sync_to[RADEON_NUM_RINGS];
 };

 int radeon_semaphore_create(struct radeon_device *rdev,
@ -613,15 +624,32 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
 				  struct radeon_semaphore *semaphore);
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 				struct radeon_semaphore *semaphore);
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-			      struct radeon_fence *fence);
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
-				struct radeon_semaphore *semaphore,
-				int waiting_ring);
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence);

+/*
+ * Synchronization
+ */
+struct radeon_sync {
+	struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
+	struct radeon_fence	*sync_to[RADEON_NUM_RINGS];
+	struct radeon_fence	*last_vm_update;
+};
+
+void radeon_sync_create(struct radeon_sync *sync);
+void radeon_sync_fence(struct radeon_sync *sync,
+			      struct radeon_fence *fence);
+int radeon_sync_resv(struct radeon_device *rdev,
+		     struct radeon_sync *sync,
+		     struct reservation_object *resv,
+		     bool shared);
+int radeon_sync_rings(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+				int waiting_ring);
+void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
+			   struct radeon_fence *fence);
+
 /*
 * GART structures, functions & helpers
 */
@ -722,6 +750,10 @@ struct radeon_doorbell {

 int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
 void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell);
+void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
+				  phys_addr_t *aperture_base,
+				  size_t *aperture_size,
+				  size_t *start_offset);

 /*
 * IRQS.
@ -801,6 +833,7 @@ struct radeon_irq {
 int radeon_irq_kms_init(struct radeon_device *rdev);
 void radeon_irq_kms_fini(struct radeon_device *rdev);
 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
@ -822,7 +855,7 @@ struct radeon_ib {
 	struct radeon_fence	*fence;
 	struct radeon_vm		*vm;
 	bool			is_const_ib;
-	struct radeon_semaphore		*semaphore;
+	struct radeon_sync		sync;
 };

 struct radeon_ring {
@ -899,9 +932,22 @@ struct radeon_vm_pt {
 	uint64_t			addr;
 };

+struct radeon_vm_id {
+	unsigned		id;
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct radeon_fence	*flushed_updates;
+	/* last use of vmid */
+	struct radeon_fence	*last_id_use;
+};
+
 struct radeon_vm {
+	struct mutex		mutex;
+
 	struct rb_root			va;
-	unsigned			id;
+
+	/* protecting invalidated and freed */
+	spinlock_t		status_lock;

 	/* BOs moved, but not yet updated in the PT */
 	struct list_head		invalidated;
@ -911,7 +957,6 @@ struct radeon_vm {

 	/* contains the page directory */
 	struct radeon_bo		*page_directory;
-	uint64_t			pd_gpu_addr;
 	unsigned			max_pde_used;

 	/* array of page tables, one for each page directory entry */
@ -919,13 +964,8 @@ struct radeon_vm {

 	struct radeon_bo_va		*ib_bo_va;

-	struct mutex			mutex;
-	/* last fence for cs using this vm */
-	struct radeon_fence		*fence;
-	/* last flush or NULL if we still need to flush */
-	struct radeon_fence		*last_flush;
-	/* last use of vmid */
-	struct radeon_fence		*last_id_use;
+	/* for id and flush management per ring */
+	struct radeon_vm_id	ids[RADEON_NUM_RINGS];
 };

 struct radeon_vm_manager {
@ -1033,19 +1073,7 @@ void cayman_dma_fini(struct radeon_device *rdev);
 /*
 * CS.
 */
-struct radeon_cs_reloc {
-	struct drm_gem_object		*gobj;
-	struct radeon_bo		*robj;
-	struct ttm_validate_buffer	tv;
-	uint64_t			gpu_offset;
-	unsigned			prefered_domains;
-	unsigned			allowed_domains;
-	uint32_t			tiling_flags;
-    uint32_t                handle;
-};
-
 struct radeon_cs_chunk {
-	uint32_t		chunk_id;
 	uint32_t		length_dw;
 	uint32_t		*kdata;
 	void __user *user_ptr;
@ -1063,16 +1091,15 @@ struct radeon_cs_parser {
 	unsigned		idx;
 	/* relocations */
 	unsigned		nrelocs;
-	struct radeon_cs_reloc	*relocs;
-	struct radeon_cs_reloc	**relocs_ptr;
-	struct radeon_cs_reloc	*vm_bos;
+	struct radeon_bo_list	*relocs;
+	struct radeon_bo_list	*vm_bos;
 	struct list_head	validated;
 	unsigned		dma_reloc_idx;
 	/* indices of various chunks */
-	int			chunk_ib_idx;
-	int			chunk_relocs_idx;
-	int			chunk_flags_idx;
-	int			chunk_const_ib_idx;
+	struct radeon_cs_chunk  *chunk_ib;
+	struct radeon_cs_chunk  *chunk_relocs;
+	struct radeon_cs_chunk  *chunk_flags;
+	struct radeon_cs_chunk  *chunk_const_ib;
 	struct radeon_ib	ib;
 	struct radeon_ib	const_ib;
 	void			*track;
@ -1086,7 +1113,7 @@ struct radeon_cs_parser {

 static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 {
-	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	struct radeon_cs_chunk *ibc = p->chunk_ib;

 	if (ibc->kdata)
 		return ibc->kdata[idx];
@ -1498,6 +1525,10 @@ struct radeon_dpm_fan {
 	u8 t_hyst;
 	u32 cycle_delay;
 	u16 t_max;
+	u8 control_mode;
+	u16 default_max_fan_pwm;
+	u16 default_fan_output_sensitivity;
+	u16 fan_output_sensitivity;
 	bool ucode_fan_control;
 };

@ -1631,6 +1662,11 @@ struct radeon_pm {
 	/* internal thermal controller on rv6xx+ */
 	enum radeon_int_thermal_type int_thermal_type;
 	struct device	        *int_hwmon_dev;
+	/* fan control parameters */
+	bool                    no_fan;
+	u8                      fan_pulses_per_revolution;
+	u8                      fan_min_rpm;
+	u8                      fan_max_rpm;
 	/* dpm */
 	bool                    dpm_enabled;
 	struct radeon_dpm       dpm;
@ -1665,7 +1701,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence);
 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 			       uint32_t handle, struct radeon_fence **fence);
-void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
+				       uint32_t allowed_domains);
 void radeon_uvd_free_handles(struct radeon_device *rdev,
 			     struct drm_file *filp);
 int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
@ -1754,6 +1791,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *cpB);
 void radeon_test_syncing(struct radeon_device *rdev);

+/*
+ * MMU Notifier
+ */
+int radeon_mn_register(struct radeon_bo *bo, unsigned long addr);
+void radeon_mn_unregister(struct radeon_bo *bo);

 /*
 * Debugfs
@ -1787,7 +1829,8 @@ struct radeon_asic_ring {
 	void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
 	bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
 			       struct radeon_semaphore *semaphore, bool emit_wait);
-	void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+	void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr);

 	/* testing functions */
 	int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
@ -1868,24 +1911,24 @@ struct radeon_asic {
 	} display;
 	/* copy functions for bo handling */
 	struct {
-		int (*blit)(struct radeon_device *rdev,
+		struct radeon_fence *(*blit)(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
 			 unsigned num_gpu_pages,
-			    struct radeon_fence **fence);
+					     struct reservation_object *resv);
 		u32 blit_ring_index;
-		int (*dma)(struct radeon_device *rdev,
+		struct radeon_fence *(*dma)(struct radeon_device *rdev,
 			uint64_t src_offset,
 			uint64_t dst_offset,
 			unsigned num_gpu_pages,
-			   struct radeon_fence **fence);
+					    struct reservation_object *resv);
 		u32 dma_ring_index;
 		/* method used for bo copy */
-	int (*copy)(struct radeon_device *rdev,
+		struct radeon_fence *(*copy)(struct radeon_device *rdev,
 		    uint64_t src_offset,
 		    uint64_t dst_offset,
 		    unsigned num_gpu_pages,
-			    struct radeon_fence **fence);
+					     struct reservation_object *resv);
 		/* ring used for bo copies */
 		u32 copy_ring_index;
 	} copy;
@ -2291,6 +2334,7 @@ struct radeon_device {
    struct radeon_mman          mman;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
 	wait_queue_head_t		fence_queue;
+	unsigned			fence_context;
 	struct mutex			ring_lock;
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	bool				ib_pool_ready;
@ -2309,7 +2353,7 @@ struct radeon_device {
 	bool				need_dma32;
 	bool				accel_working;
 	bool				fastfb_working; /* IGP feature*/
-	bool				needs_reset;
+	bool				needs_reset, in_reset;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
@ -2330,7 +2374,6 @@ struct radeon_device {
 	struct radeon_mec mec;
 	struct work_struct hotplug_work;
 	struct work_struct audio_work;
-	struct work_struct reset_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
 	bool has_uvd;
@ -2355,6 +2398,8 @@ struct radeon_device {
 	struct radeon_atcs		atcs;
 	/* srbm instance registers */
 	struct mutex			srbm_mutex;
+	/* GRBM index mutex. Protects concurrents access to GRBM index */
+	struct mutex			grbm_idx_mutex;
 	/* clock, powergating flags */
 	u32 cg_flags;
 	u32 pg_flags;
@ -2366,6 +2411,7 @@ struct radeon_device {
 	/* tracking pinned memory */
 	u64 vram_pin_size;
 	u64 gart_pin_size;
+	struct mutex	mn_lock;
 };

 bool radeon_is_px(struct drm_device *dev);
@ -2421,7 +2467,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
 /*
 * Cast helper
 */
-#define to_radeon_fence(p) ((struct radeon_fence *)(p))
+extern const struct fence_ops radeon_fence_ops;
+
+static inline struct radeon_fence *to_radeon_fence(struct fence *f)
+{
+	struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
+
+	if (__f->base.ops == &radeon_fence_ops)
+		return __f;
+
+	return NULL;
+}

 /*
 * Registers read & write functions.
@ -2741,18 +2797,25 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
 * RING helpers.
 */
-#if DRM_DEBUG_CODE == 0
+
+/**
+ * radeon_ring_write - write a value to the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ * @v: dword (dw) value to write
+ *
+ * Write a value to the requested ring buffer (all asics).
+ */
 static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
+	if (ring->count_dw <= 0)
+		DRM_ERROR("radeon: writing more dwords to the ring than expected!\n");
+
 	ring->ring[ring->wptr++] = v;
 	ring->wptr &= ring->ptr_mask;
 	ring->count_dw--;
 	ring->ring_free_dw--;
 }
-#else
-/* With debugging this is just too big to inline */
-void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
-#endif

 /*
 * ASICs macro.
@ -2778,7 +2841,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
 #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
-#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm))
+#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr))
 #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
 #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
 #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
@ -2791,9 +2854,9 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
 #define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
 #define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
-#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
-#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
-#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
+#define radeon_copy_blit(rdev, s, d, np, resv) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (resv))
+#define radeon_copy_dma(rdev, s, d, np, resv) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (resv))
+#define radeon_copy(rdev, s, d, np, resv) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (resv))
 #define radeon_copy_blit_ring_index(rdev) (rdev)->asic->copy.blit_ring_index
 #define radeon_copy_dma_ring_index(rdev) (rdev)->asic->copy.dma_ring_index
 #define radeon_copy_ring_index(rdev) (rdev)->asic->copy.copy_ring_index
@ -2867,6 +2930,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
+				     uint32_t flags);
+extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm);
+extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm);
 extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
 extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
@ -2883,14 +2950,14 @@ int radeon_vm_manager_init(struct radeon_device *rdev);
 void radeon_vm_manager_fini(struct radeon_device *rdev);
 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 					  struct radeon_vm *vm,
                                          struct list_head *head);
 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring);
 void radeon_vm_flush(struct radeon_device *rdev,
                     struct radeon_vm *vm,
-                     int ring);
+		     int ring, struct radeon_fence *fence);
 void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
@ -2924,10 +2991,10 @@ struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev);
 struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev);
 void r600_audio_enable(struct radeon_device *rdev,
 		       struct r600_audio_pin *pin,
-		       bool enable);
+		       u8 enable_mask);
 void dce6_audio_enable(struct radeon_device *rdev,
 		       struct r600_audio_pin *pin,
-		       bool enable);
+		       u8 enable_mask);

 /*
 * R600 vram scratch functions
@ -2997,7 +3064,7 @@ bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p);
 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 			   struct radeon_cs_packet *pkt);
 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
-				struct radeon_cs_reloc **cs_reloc,
+				struct radeon_bo_list **cs_reloc,
 				int nomm);
 int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
 			       uint32_t *vline_start_end,
@ -3005,7 +3072,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p,

 #include "radeon_object.h"

-#define DRM_UDELAY(d)           udelay(d)
+#define PCI_DEVICE_ID_ATI_RADEON_QY     0x5159

 resource_size_t
 drm_get_resource_start(struct drm_device *dev, unsigned int resource);
--- a/drivers/video/drm/radeon/radeon_asic.c
+++ b/drivers/video/drm/radeon/radeon_asic.c
@ -2294,6 +2294,14 @@ int radeon_asic_init(struct radeon_device *rdev)
 	case CHIP_RS780:
 	case CHIP_RS880:
 		rdev->asic = &rs780_asic;
+		/* 760G/780V/880V don't have UVD */
+		if ((rdev->pdev->device == 0x9616)||
+		    (rdev->pdev->device == 0x9611)||
+		    (rdev->pdev->device == 0x9613)||
+		    (rdev->pdev->device == 0x9711)||
+		    (rdev->pdev->device == 0x9713))
+			rdev->has_uvd = false;
+		else
 		rdev->has_uvd = true;
 		break;
 	case CHIP_RV770:
--- a/drivers/video/drm/radeon/radeon_asic.h
+++ b/drivers/video/drm/radeon/radeon_asic.h
@ -81,11 +81,11 @@ bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 int r100_cs_parse(struct radeon_cs_parser *p);
 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
-int r100_copy_blit(struct radeon_device *rdev,
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+				    struct reservation_object *resv);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size);
@ -152,11 +152,11 @@ void r100_gfx_set_wptr(struct radeon_device *rdev,
 /*
 * r200,rv250,rs300,rv280
 */
-extern int r200_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
 			uint64_t src_offset,
 			uint64_t dst_offset,
 			 unsigned num_gpu_pages,
-			 struct radeon_fence **fence);
+				   struct reservation_object *resv);
 void r200_set_safe_registers(struct radeon_device *rdev);

 /*
@ -340,12 +340,14 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
-int r600_copy_cpdma(struct radeon_device *rdev,
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_gpu_pages, struct radeon_fence **fence);
-int r600_copy_dma(struct radeon_device *rdev,
+				     unsigned num_gpu_pages,
+				     struct reservation_object *resv);
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages, struct radeon_fence **fence);
+				   unsigned num_gpu_pages,
+				   struct reservation_object *resv);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@ -389,7 +391,6 @@ void r600_disable_interrupts(struct radeon_device *rdev);
 void r600_rlc_stop(struct radeon_device *rdev);
 /* r600 audio */
 int r600_audio_init(struct radeon_device *rdev);
-struct r600_audio_pin r600_audio_status(struct radeon_device *rdev);
 void r600_audio_fini(struct radeon_device *rdev);
 void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock);
 void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, void *buffer,
@ -461,10 +462,10 @@ bool rv770_page_flip_pending(struct radeon_device *rdev, int crtc);
 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 void r700_cp_stop(struct radeon_device *rdev);
 void r700_cp_fini(struct radeon_device *rdev);
-int rv770_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset, uint64_t dst_offset,
 		  unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+				    struct reservation_object *resv);
 u32 rv770_get_xclk(struct radeon_device *rdev);
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int rv770_get_temp(struct radeon_device *rdev);
@ -535,10 +536,10 @@ void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
 				   struct radeon_fence *fence);
 void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
 				   struct radeon_ib *ib);
-int evergreen_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
 		       uint64_t src_offset, uint64_t dst_offset,
 		       unsigned num_gpu_pages,
-		       struct radeon_fence **fence);
+					struct reservation_object *resv);
 void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable);
 void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
 int evergreen_get_temp(struct radeon_device *rdev);
@ -598,7 +599,8 @@ int cayman_asic_reset(struct radeon_device *rdev);
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
@ -623,7 +625,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev,
 			    uint32_t incr, uint32_t flags);
 void cayman_dma_vm_pad_ib(struct radeon_ib *ib);

-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr);

 u32 cayman_gfx_get_rptr(struct radeon_device *rdev,
 			struct radeon_ring *ring);
@ -698,12 +701,13 @@ int si_irq_set(struct radeon_device *rdev);
 int si_irq_process(struct radeon_device *rdev);
 int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		 unsigned vm_id, uint64_t pd_addr);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
-int si_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
 		uint64_t src_offset, uint64_t dst_offset,
 		unsigned num_gpu_pages,
-		struct radeon_fence **fence);
+				 struct reservation_object *resv);

 void si_dma_vm_copy_pages(struct radeon_device *rdev,
 			  struct radeon_ib *ib,
@ -720,7 +724,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev,
 			 uint64_t addr, unsigned count,
 			 uint32_t incr, uint32_t flags);

-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr);
 u32 si_get_xclk(struct radeon_device *rdev);
 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
@ -759,14 +764,14 @@ bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
 				  struct radeon_semaphore *semaphore,
 				  bool emit_wait);
 void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int cik_copy_dma(struct radeon_device *rdev,
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
 		 uint64_t src_offset, uint64_t dst_offset,
 		 unsigned num_gpu_pages,
-		 struct radeon_fence **fence);
-int cik_copy_cpdma(struct radeon_device *rdev,
+				  struct reservation_object *resv);
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+				    struct reservation_object *resv);
 int cik_sdma_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
@ -792,7 +797,8 @@ int cik_irq_set(struct radeon_device *rdev);
 int cik_irq_process(struct radeon_device *rdev);
 int cik_vm_init(struct radeon_device *rdev);
 void cik_vm_fini(struct radeon_device *rdev);
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		  unsigned vm_id, uint64_t pd_addr);

 void cik_sdma_vm_copy_pages(struct radeon_device *rdev,
 			    struct radeon_ib *ib,
@ -810,7 +816,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev,
 			   uint32_t incr, uint32_t flags);
 void cik_sdma_vm_pad_ib(struct radeon_ib *ib);

-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		      unsigned vm_id, uint64_t pd_addr);
 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
 			      struct radeon_ring *ring);
@ -882,6 +889,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
                           struct radeon_ring *ring);
 void uvd_v1_0_set_wptr(struct radeon_device *rdev,
                       struct radeon_ring *ring);
+int uvd_v1_0_resume(struct radeon_device *rdev);

 int uvd_v1_0_init(struct radeon_device *rdev);
 void uvd_v1_0_fini(struct radeon_device *rdev);
@ -889,6 +897,8 @@ int uvd_v1_0_start(struct radeon_device *rdev);
 void uvd_v1_0_stop(struct radeon_device *rdev);

 int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void uvd_v1_0_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
 int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
 			     struct radeon_ring *ring,
--- a/drivers/video/drm/radeon/radeon_atombios.c
+++ b/drivers/video/drm/radeon/radeon_atombios.c
@ -196,7 +196,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
 	}
 }

-static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+struct radeon_gpio_rec radeon_atombios_lookup_gpio(struct radeon_device *rdev,
 							u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
@ -221,6 +221,7 @@ static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
 		if (id == pin->ucGPIO_ID) {
 			gpio.id = pin->ucGPIO_ID;
 				gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex) * 4;
+				gpio.shift = pin->ucGpioPinBitShift;
 			gpio.mask = (1 << pin->ucGpioPinBitShift);
 			gpio.valid = true;
 			break;
@ -458,7 +459,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 	return true;
 }

-const int supported_devices_connector_convert[] = {
+static const int supported_devices_connector_convert[] = {
 	DRM_MODE_CONNECTOR_Unknown,
 	DRM_MODE_CONNECTOR_VGA,
 	DRM_MODE_CONNECTOR_DVII,
@ -477,7 +478,7 @@ const int supported_devices_connector_convert[] = {
 	DRM_MODE_CONNECTOR_DisplayPort
 };

-const uint16_t supported_devices_connector_object_id_convert[] = {
+static const uint16_t supported_devices_connector_object_id_convert[] = {
 	CONNECTOR_OBJECT_ID_NONE,
 	CONNECTOR_OBJECT_ID_VGA,
 	CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I, /* not all boards support DL */
@ -494,7 +495,7 @@ const uint16_t supported_devices_connector_object_id_convert[] = {
 	CONNECTOR_OBJECT_ID_SVIDEO
 };

-const int object_connector_convert[] = {
+static const int object_connector_convert[] = {
 	DRM_MODE_CONNECTOR_Unknown,
 	DRM_MODE_CONNECTOR_DVII,
 	DRM_MODE_CONNECTOR_DVII,
@ -801,7 +802,7 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
 								hpd_record =
 									(ATOM_HPD_INT_RECORD *)
 									record;
-								gpio = radeon_lookup_gpio(rdev,
+								gpio = radeon_atombios_lookup_gpio(rdev,
 											  hpd_record->ucHPDIntGPIOID);
 								hpd = radeon_atom_get_hpd_info_from_gpio(rdev, &gpio);
 								hpd.plugged_state = hpd_record->ucPlugged_PinState;
@ -2128,7 +2129,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
 							VOLTAGE_GPIO;
 						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
-							radeon_lookup_gpio(rdev,
+					radeon_atombios_lookup_gpio(rdev,
 							power_info->info.asPowerPlayInfo[i].ucVoltageDropIndex);
 						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
 							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@ -2164,7 +2165,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
 							VOLTAGE_GPIO;
 						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
-							radeon_lookup_gpio(rdev,
+					radeon_atombios_lookup_gpio(rdev,
 							power_info->info_2.asPowerPlayInfo[i].ucVoltageDropIndex);
 						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
 							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@ -2200,7 +2201,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
 							VOLTAGE_GPIO;
 						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
-							radeon_lookup_gpio(rdev,
+					radeon_atombios_lookup_gpio(rdev,
 							power_info->info_3.asPowerPlayInfo[i].ucVoltageDropIndex);
 						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
 							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@ -2248,6 +2249,14 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r

 			/* add the i2c bus for thermal/fan chip */
 			if (controller->ucType > 0) {
+		if (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN)
+			rdev->pm.no_fan = true;
+		rdev->pm.fan_pulses_per_revolution =
+			controller->ucFanParameters & ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK;
+		if (rdev->pm.fan_pulses_per_revolution) {
+			rdev->pm.fan_min_rpm = controller->ucFanMinRPM;
+			rdev->pm.fan_max_rpm = controller->ucFanMaxRPM;
+		}
 				if (controller->ucType == ATOM_PP_THERMALCONTROLLER_RV6xx) {
 					DRM_INFO("Internal thermal controller %s fan control\n",
 						 (controller->ucFanParameters &
--- a/drivers/video/drm/radeon/radeon_benchmark.c
+++ b/drivers/video/drm/radeon/radeon_benchmark.c
@ -45,33 +45,29 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
 	for (i = 0; i < n; i++) {
 		switch (flag) {
 		case RADEON_BENCHMARK_COPY_DMA:
-			r = radeon_copy_dma(rdev, saddr, daddr,
+			fence = radeon_copy_dma(rdev, saddr, daddr,
 					    size / RADEON_GPU_PAGE_SIZE,
-					    &fence);
+						NULL);
 			break;
 		case RADEON_BENCHMARK_COPY_BLIT:
-			r = radeon_copy_blit(rdev, saddr, daddr,
+			fence = radeon_copy_blit(rdev, saddr, daddr,
 					     size / RADEON_GPU_PAGE_SIZE,
-					     &fence);
+						 NULL);
 			break;
 		default:
 			DRM_ERROR("Unknown copy method\n");
-			r = -EINVAL;
+			return -EINVAL;
 		}
-		if (r)
-			goto exit_do_move;
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+
 		r = radeon_fence_wait(fence, false);
-		if (r)
-			goto exit_do_move;
 		radeon_fence_unref(&fence);
+		if (r)
+			return r;
 	}
 	end_jiffies = jiffies;
-	r = jiffies_to_msecs(end_jiffies - start_jiffies);
-
-exit_do_move:
-	if (fence)
-		radeon_fence_unref(&fence);
-	return r;
+	return jiffies_to_msecs(end_jiffies - start_jiffies);
 }


@ -100,7 +96,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
    ENTER();

 	n = RADEON_BENCHMARK_ITERATIONS;
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, &sobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, NULL, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@ -112,7 +108,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 	if (r) {
 		goto out_cleanup;
 	}
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, &dobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, NULL, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}
--- a/drivers/video/drm/radeon/radeon_combios.c
+++ b/drivers/video/drm/radeon/radeon_combios.c
@ -116,7 +116,7 @@ enum radeon_combios_connector {
 	CONNECTOR_UNSUPPORTED_LEGACY
 };

-const int legacy_connector_convert[] = {
+static const int legacy_connector_convert[] = {
 	DRM_MODE_CONNECTOR_Unknown,
 	DRM_MODE_CONNECTOR_DVID,
 	DRM_MODE_CONNECTOR_VGA,
--- a/drivers/video/drm/radeon/radeon_connectors.c
+++ b/drivers/video/drm/radeon/radeon_connectors.c
@ -322,6 +322,12 @@ static void radeon_connector_get_edid(struct drm_connector *connector)
 	}

 	if (!radeon_connector->edid) {
+		/* don't fetch the edid from the vbios if ddc fails and runpm is
+		 * enabled so we report disconnected.
+		 */
+		if ((rdev->flags & RADEON_IS_PX) && (radeon_runtime_pm != 0))
+			return;
+
 		if (rdev->is_atom_bios) {
 			/* some laptops provide a hardcoded edid in rom for LCDs */
 			if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
@ -826,6 +832,8 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector,
 static enum drm_connector_status
 radeon_lvds_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
--- a/drivers/video/drm/radeon/radeon_cs.c
+++ b/drivers/video/drm/radeon/radeon_cs.c
@ -84,21 +84,18 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	struct drm_device *ddev = p->rdev->ddev;
 	struct radeon_cs_chunk *chunk;
 	struct radeon_cs_buckets buckets;
-	unsigned i, j;
-	bool duplicate;
+	unsigned i;
+	bool need_mmap_lock = false;
+	int r;

-	if (p->chunk_relocs_idx == -1) {
+	if (p->chunk_relocs == NULL) {
 		return 0;
 	}
-	chunk = &p->chunks[p->chunk_relocs_idx];
+	chunk = p->chunk_relocs;
 	p->dma_reloc_idx = 0;
 	/* FIXME: we assume that each relocs use 4 dwords */
 	p->nrelocs = chunk->length_dw / 4;
-	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
-	if (p->relocs_ptr == NULL) {
-		return -ENOMEM;
-	}
-	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
@ -107,31 +104,17 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)

 	for (i = 0; i < p->nrelocs; i++) {
 		struct drm_radeon_cs_reloc *r;
+		struct drm_gem_object *gobj;
 		unsigned priority;

-		duplicate = false;
 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
-		for (j = 0; j < i; j++) {
-			if (r->handle == p->relocs[j].handle) {
-				p->relocs_ptr[i] = &p->relocs[j];
-				duplicate = true;
-				break;
-			}
-		}
-		if (duplicate) {
-			p->relocs[i].handle = 0;
-			continue;
-		}
-
-		p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
-							  r->handle);
-		if (p->relocs[i].gobj == NULL) {
+		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
+		if (gobj == NULL) {
 			DRM_ERROR("gem object lookup failed 0x%x\n",
 				  r->handle);
 			return -ENOENT;
 		}
-		p->relocs_ptr[i] = &p->relocs[i];
-		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
+		p->relocs[i].robj = gem_to_radeon_bo(gobj);

 		/* The userspace buffer priorities are from 0 to 15. A higher
 		 * number means the buffer is more important.
@ -143,10 +126,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			   + !!r->write_domain;

 		/* the first reloc of an UVD job is the msg and that must be in
-		   VRAM, also but everything into VRAM on AGP cards to avoid
-		   image corruptions */
+		   VRAM, also but everything into VRAM on AGP cards and older
+		   IGP chips to avoid image corruptions */
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
+		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
+		     p->rdev->family == CHIP_RS780 ||
+		     p->rdev->family == CHIP_RS880)) {
+
 			/* TODO: is this still needed for NI+ ? */
 			p->relocs[i].prefered_domains =
 				RADEON_GEM_DOMAIN_VRAM;
@ -171,9 +157,22 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 				domain |= RADEON_GEM_DOMAIN_GTT;
 			p->relocs[i].allowed_domains = domain;
 		}
-
+/*
+		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
+			uint32_t domain = p->relocs[i].prefered_domains;
+			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
+				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
+					  "allowed for userptr BOs\n");
+				return -EINVAL;
+			}
+			need_mmap_lock = true;
+			domain = RADEON_GEM_DOMAIN_GTT;
+			p->relocs[i].prefered_domains = domain;
+			p->relocs[i].allowed_domains = domain;
+		}
+*/
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
-		p->relocs[i].handle = r->handle;
+		p->relocs[i].tv.shared = !r->write_domain;

 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 				      priority);
@ -184,8 +183,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->cs_flags & RADEON_CS_USE_VM)
 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 					      &p->validated);
+//	if (need_mmap_lock)
+//		down_read(&current->mm->mmap_sem);

-	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
+	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
+
+//	if (need_mmap_lock)
+//		up_read(&current->mm->mmap_sem);
+
+	return r;
 }

 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@ -231,17 +237,21 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 	return 0;
 }

-static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
+static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
-	int i;
+	struct radeon_bo_list *reloc;
+	int r;

-	for (i = 0; i < p->nrelocs; i++) {
-		if (!p->relocs[i].robj)
-			continue;
+	list_for_each_entry(reloc, &p->validated, tv.head) {
+		struct reservation_object *resv;

-		radeon_semaphore_sync_to(p->ib.semaphore,
-					 p->relocs[i].robj->tbo.sync_obj);
+		resv = reloc->robj->tbo.resv;
+		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
+				     reloc->tv.shared);
+		if (r)
+			return r;
 	}
+	return 0;
 }

 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
@ -260,13 +270,11 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	INIT_LIST_HEAD(&p->validated);
 	p->idx = 0;
 	p->ib.sa_bo = NULL;
-	p->ib.semaphore = NULL;
 	p->const_ib.sa_bo = NULL;
-	p->const_ib.semaphore = NULL;
-	p->chunk_ib_idx = -1;
-	p->chunk_relocs_idx = -1;
-	p->chunk_flags_idx = -1;
-	p->chunk_const_ib_idx = -1;
+	p->chunk_ib = NULL;
+	p->chunk_relocs = NULL;
+	p->chunk_flags = NULL;
+	p->chunk_const_ib = NULL;
 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (p->chunks_array == NULL) {
 		return -ENOMEM;
@ -293,24 +301,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			return -EFAULT;
 		}
 		p->chunks[i].length_dw = user_chunk.length_dw;
-		p->chunks[i].chunk_id = user_chunk.chunk_id;
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
-			p->chunk_relocs_idx = i;
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
+			p->chunk_relocs = &p->chunks[i];
 		}
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
-			p->chunk_ib_idx = i;
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
+			p->chunk_ib = &p->chunks[i];
 			/* zero length IB isn't useful */
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
-			p->chunk_const_ib_idx = i;
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
+			p->chunk_const_ib = &p->chunks[i];
 			/* zero length CONST IB isn't useful */
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
-			p->chunk_flags_idx = i;
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
+			p->chunk_flags = &p->chunks[i];
 			/* zero length flags aren't useful */
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
@ -319,10 +326,10 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 		size = p->chunks[i].length_dw;
 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
 		p->chunks[i].user_ptr = cdata;
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
 			continue;

-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
 				continue;
 		}
@ -335,7 +342,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 			return -EFAULT;
 		}
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 			p->cs_flags = p->chunks[i].kdata[0];
 			if (p->chunks[i].length_dw > 1)
 				ring = p->chunks[i].kdata[1];
@ -376,8 +383,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 static int cmp_size_smaller_first(void *priv, struct list_head *a,
 				  struct list_head *b)
 {
-	struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
-	struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
+	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
+	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);

 	/* Sort A before B if A is smaller. */
 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
@ -410,7 +417,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo

 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
-					    parser->ib.fence);
+					    &parser->ib.fence->base);
 	} else if (backoff) {
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
@ -418,14 +425,16 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo

 	if (parser->relocs != NULL) {
 		for (i = 0; i < parser->nrelocs; i++) {
-			if (parser->relocs[i].gobj)
-				drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
+			struct radeon_bo *bo = parser->relocs[i].robj;
+			if (bo == NULL)
+				continue;
+
+			drm_gem_object_unreference_unlocked(&bo->gem_base);
 		}
 	}
 	kfree(parser->track);
 	kfree(parser->relocs);
-	kfree(parser->relocs_ptr);
-	kfree(parser->vm_bos);
+	drm_free_large(parser->vm_bos);
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
@ -439,7 +448,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 {
 	int r;

-	if (parser->chunk_ib_idx == -1)
+	if (parser->chunk_ib == NULL)
 		return 0;

 	if (parser->cs_flags & RADEON_CS_USE_VM)
@ -451,13 +460,19 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 		return r;
 	}

+	r = radeon_cs_sync_rings(parser);
+	if (r) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to sync rings: %i\n", r);
+		return r;
+	}
+
 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 		radeon_uvd_note_usage(rdev);
 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
 		radeon_vce_note_usage(rdev);

-	radeon_cs_sync_rings(parser);
 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 	if (r) {
 		DRM_ERROR("Failed to schedule IB !\n");
@ -493,10 +508,6 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 	for (i = 0; i < p->nrelocs; i++) {
 		struct radeon_bo *bo;

-		/* ignore duplicates */
-		if (p->relocs_ptr[i] != &p->relocs[i])
-			continue;
-
 		bo = p->relocs[i].robj;
 		bo_va = radeon_vm_bo_find(vm, bo);
 		if (bo_va == NULL) {
@ -507,6 +518,8 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 		if (r)
 			return r;
+
+		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 	}

 	return radeon_vm_clear_invalids(rdev, vm);
@ -519,7 +532,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	struct radeon_vm *vm = &fpriv->vm;
 	int r;

-	if (parser->chunk_ib_idx == -1)
+	if (parser->chunk_ib == NULL)
 		return 0;
 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 		return 0;
@ -544,11 +557,16 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (r) {
 		goto out;
 	}
-	radeon_cs_sync_rings(parser);
-	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
+
+	r = radeon_cs_sync_rings(parser);
+	if (r) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to sync rings: %i\n", r);
+		goto out;
+	}

 	if ((rdev->family >= CHIP_TAHITI) &&
-	    (parser->chunk_const_ib_idx != -1)) {
+	    (parser->chunk_const_ib != NULL)) {
 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
 	} else {
 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
@ -575,7 +593,7 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
 	struct radeon_vm *vm = NULL;
 	int r;

-	if (parser->chunk_ib_idx == -1)
+	if (parser->chunk_ib == NULL)
 		return 0;

 	if (parser->cs_flags & RADEON_CS_USE_VM) {
@ -583,8 +601,8 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
 		vm = &fpriv->vm;

 		if ((rdev->family >= CHIP_TAHITI) &&
-		    (parser->chunk_const_ib_idx != -1)) {
-			ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
+		    (parser->chunk_const_ib != NULL)) {
+			ib_chunk = parser->chunk_const_ib;
 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
 				return -EINVAL;
@ -603,13 +621,13 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
 				return -EFAULT;
 		}

-		ib_chunk = &parser->chunks[parser->chunk_ib_idx];
+		ib_chunk = parser->chunk_ib;
 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
 			return -EINVAL;
 		}
 	}
-	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
+	ib_chunk = parser->chunk_ib;

 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 			   vm, ib_chunk->length_dw * 4);
@ -694,7 +712,7 @@ int radeon_cs_packet_parse(struct radeon_cs_parser *p,
 			   struct radeon_cs_packet *pkt,
 			   unsigned idx)
 {
-	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
 	struct radeon_device *rdev = p->rdev;
 	uint32_t header;

@ -788,7 +806,7 @@ void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 * GPU offset using the provided start.
 **/
 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
-				struct radeon_cs_reloc **cs_reloc,
+				struct radeon_bo_list **cs_reloc,
 				int nomm)
 {
 	struct radeon_cs_chunk *relocs_chunk;
@ -796,12 +814,12 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 	unsigned idx;
 	int r;

-	if (p->chunk_relocs_idx == -1) {
+	if (p->chunk_relocs == NULL) {
 		DRM_ERROR("No relocation chunk !\n");
 		return -EINVAL;
 	}
 	*cs_reloc = NULL;
-	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	relocs_chunk = p->chunk_relocs;
 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 	if (r)
 		return r;
@ -827,6 +845,6 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 			(u64)relocs_chunk->kdata[idx + 3] << 32;
 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
 	} else
-		*cs_reloc = p->relocs_ptr[(idx / 4)];
+		*cs_reloc = &p->relocs[(idx / 4)];
 	return 0;
 }
--- a/drivers/video/drm/radeon/radeon_device.c
+++ b/drivers/video/drm/radeon/radeon_device.c
@ -71,7 +71,7 @@ int radeon_deep_color = 0;
 int radeon_use_pflipirq = 2;
 int irq_override = 0;
 int radeon_bapm = -1;
-int radeon_backlight = 0; 
+int radeon_backlight = 0;

 extern display_t *os_display;
 extern struct drm_device *main_device;
@ -82,7 +82,7 @@ void parse_cmdline(char *cmdline, videomode_t *mode, char *log, int *kms);
 int init_display(struct radeon_device *rdev, videomode_t *mode);
 int init_display_kms(struct drm_device *dev, videomode_t *usermode);

-int get_modes(videomode_t *mode, u32_t *count);
+int get_modes(videomode_t *mode, u32 *count);
 int set_user_mode(videomode_t *mode);
 int r100_2D_test(struct radeon_device *rdev);

@ -437,6 +437,37 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell)
 		__clear_bit(doorbell, rdev->doorbell.used);
 }

+/**
+ * radeon_doorbell_get_kfd_info - Report doorbell configuration required to
+ *                                setup KFD
+ *
+ * @rdev: radeon_device pointer
+ * @aperture_base: output returning doorbell aperture base physical address
+ * @aperture_size: output returning doorbell aperture size in bytes
+ * @start_offset: output returning # of doorbell bytes reserved for radeon.
+ *
+ * Radeon and the KFD share the doorbell aperture. Radeon sets it up,
+ * takes doorbells required for its own rings and reports the setup to KFD.
+ * Radeon reserved doorbells are at the start of the doorbell aperture.
+ */
+void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
+				  phys_addr_t *aperture_base,
+				  size_t *aperture_size,
+				  size_t *start_offset)
+{
+	/* The first num_doorbells are used by radeon.
+	 * KFD takes whatever's left in the aperture. */
+	if (rdev->doorbell.size > rdev->doorbell.num_doorbells * sizeof(u32)) {
+		*aperture_base = rdev->doorbell.base;
+		*aperture_size = rdev->doorbell.size;
+		*start_offset = rdev->doorbell.num_doorbells * sizeof(u32);
+	} else {
+		*aperture_base = 0;
+		*aperture_size = 0;
+		*start_offset = 0;
+	}
+}
+
 /*
 * radeon_wb_*()
 * Writeback is the the method by which the the GPU updates special pages
@ -494,7 +525,7 @@ int radeon_wb_init(struct radeon_device *rdev)

 	if (rdev->wb.wb_obj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &rdev->wb.wb_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create WB bo failed\n", r);
@ -998,6 +1029,7 @@ int radeon_atombios_init(struct radeon_device *rdev)
 	}

 	mutex_init(&rdev->mode_info.atom_context->mutex);
+	mutex_init(&rdev->mode_info.atom_context->scratch_mutex);
    radeon_atom_initialize_bios_scratch_regs(rdev->ddev);
 	atom_allocate_fb_scratch(rdev->mode_info.atom_context);
    return 0;
@ -1234,6 +1266,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		rdev->ring[i].idx = i;
 	}
+	rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);

 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
 		radeon_family_name[rdev->family], pdev->vendor, pdev->device,
@ -1248,9 +1281,13 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->gpu_clock_mutex);
 	mutex_init(&rdev->srbm_mutex);
+	mutex_init(&rdev->grbm_idx_mutex);
+
 //   init_rwsem(&rdev->pm.mclk_lock);
 //   init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
+	mutex_init(&rdev->mn_lock);
+//	hash_init(rdev->mn_hash);
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
@ -1362,9 +1399,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r)
        return r;

-	r = radeon_ib_ring_tests(rdev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);


 	if (rdev->flags & RADEON_IS_AGP && !rdev->accel_working) {
@ -1379,6 +1413,10 @@ int radeon_device_init(struct radeon_device *rdev,
 		return r;
 	}

+//   r = radeon_ib_ring_tests(rdev);
+//   if (r)
+//       DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	if ((radeon_testing & 1)) {
 		if (rdev->accel_working)
 			radeon_test_moves(rdev);
@ -1436,7 +1474,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
        }
    }

-retry:
    r = radeon_asic_reset(rdev);
    if (!r) {
        dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
@ -1445,25 +1482,12 @@ retry:

    radeon_restore_bios_scratch_regs(rdev);

-    if (!r) {
        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!r && ring_data[i]) {
            radeon_ring_restore(rdev, &rdev->ring[i],
                        ring_sizes[i], ring_data[i]);
-            ring_sizes[i] = 0;
-            ring_data[i] = NULL;
-        }
-
-//        r = radeon_ib_ring_tests(rdev);
-//        if (r) {
-//            dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
-//            if (saved) {
-//                saved = false;
-//                radeon_suspend(rdev);
-//                goto retry;
-//            }
-//        }
    } else {
-        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+			radeon_fence_driver_force_completion(rdev, i);
            kfree(ring_data[i]);
        }
    }
--- a/drivers/video/drm/radeon/radeon_display.c
+++ b/drivers/video/drm/radeon/radeon_display.c
@ -1537,7 +1537,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl

 	/* In vblank? */
 	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_INVBL;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;

 	/* Is vpos outside nominal vblank area, but less than
 	 * 1/100 of a frame height away from start of vblank?
--- a/drivers/video/drm/radeon/radeon_encoders.c
+++ b/drivers/video/drm/radeon/radeon_encoders.c
@ -179,6 +179,9 @@ static void radeon_encoder_add_backlight(struct radeon_encoder *radeon_encoder,
 		    (rdev->pdev->subsystem_vendor == 0x1734) &&
 		    (rdev->pdev->subsystem_device == 0x1107))
 			use_bl = false;
+		/* disable native backlight control on older asics */
+		else if (rdev->family < CHIP_R600)
+			use_bl = false;
 		else
 			use_bl = true;
 	}
@ -410,3 +413,24 @@ bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder,
 		}
 }

+bool radeon_encoder_is_digital(struct drm_encoder *encoder)
+{
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		return true;
+	default:
+		return false;
+	}
+}
--- a/drivers/video/drm/radeon/radeon_fb.c
+++ b/drivers/video/drm/radeon/radeon_fb.c
@ -184,7 +184,8 @@ out_unref:
 static int radeonfb_create(struct drm_fb_helper *helper,
 			   struct drm_fb_helper_surface_size *sizes)
 {
-	struct radeon_fbdev *rfbdev = (struct radeon_fbdev *)helper;
+	struct radeon_fbdev *rfbdev =
+		container_of(helper, struct radeon_fbdev, helper);
 	struct radeon_device *rdev = rfbdev->rdev;
 	struct fb_info *info;
 	struct drm_framebuffer *fb = NULL;
--- a/drivers/video/drm/radeon/radeon_fence.c
+++ b/drivers/video/drm/radeon/radeon_fence.c
@ -29,9 +29,8 @@
 *    Dave Airlie
 */
 #include <linux/seq_file.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/wait.h>
-#include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
 #include <drm/drmP.h>
@ -111,30 +110,70 @@ int radeon_fence_emit(struct radeon_device *rdev,
 		      struct radeon_fence **fence,
 		      int ring)
 {
+	u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
+
 	/* we are protected by the ring emission mutex */
 	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
 	if ((*fence) == NULL) {
 		return -ENOMEM;
 	}
-	kref_init(&((*fence)->kref));
 	(*fence)->rdev = rdev;
-	(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+	(*fence)->seq = seq;
 	(*fence)->ring = ring;
+	(*fence)->is_vm_update = false;
+	fence_init(&(*fence)->base, &radeon_fence_ops,
+		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
 	return 0;
 }

 /**
- * radeon_fence_process - process a fence
+ * radeon_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct radeon_fence *fence;
+	u64 seq;
+
+	fence = container_of(wait, struct radeon_fence, fence_wake);
+
+	/*
+	 * We cannot use radeon_fence_process here because we're already
+	 * in the waitqueue, in a call from wake_up_all.
+	 */
+	seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
+	if (seq >= fence->seq) {
+		int ret = fence_signal_locked(&fence->base);
+
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		else
+			FENCE_TRACE(&fence->base, "was already signaled\n");
+
+		radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
+//       __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
+		fence_put(&fence->base);
+	} else
+		FENCE_TRACE(&fence->base, "pending\n");
+	return 0;
+}
+
+/**
+ * radeon_fence_activity - check for fence activity
 *
 * @rdev: radeon_device pointer
 * @ring: ring index the fence is associated with
 *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
+ * Checks the current fence value and calculates the last
+ * signalled fence value. Returns true if activity occured
+ * on the ring, and the fence_queue should be waken up.
 */
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq, last_emitted;
 	unsigned count_loop = 0;
@ -190,23 +229,77 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 		}
 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);

-	if (wake)
-		wake_up_all(&rdev->fence_queue);
+//	if (seq < last_emitted)
+//		radeon_fence_schedule_check(rdev, ring);
+
+	return wake;
 }

 /**
- * radeon_fence_destroy - destroy a fence
+ * radeon_fence_check_lockup - check for hardware lockup
 *
- * @kref: fence kref
+ * @work: delayed work item
 *
- * Frees the fence object (all asics).
+ * Checks for fence activity and if there is none probe
+ * the hardware if a lockup occured.
 */
-static void radeon_fence_destroy(struct kref *kref)
+static void radeon_fence_check_lockup(struct work_struct *work)
 {
-	struct radeon_fence *fence;
+	struct radeon_fence_driver *fence_drv;
+	struct radeon_device *rdev;
+	int ring;

-	fence = container_of(kref, struct radeon_fence, kref);
-	kfree(fence);
+	fence_drv = container_of(work, struct radeon_fence_driver,
+				 lockup_work.work);
+	rdev = fence_drv->rdev;
+	ring = fence_drv - &rdev->fence_drv[0];
+
+//	if (!down_read_trylock(&rdev->exclusive_lock)) {
+//		/* just reschedule the check if a reset is going on */
+//		radeon_fence_schedule_check(rdev, ring);
+//		return;
+//	}
+
+	if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
+		unsigned long irqflags;
+
+		fence_drv->delayed_irq = false;
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
+		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+	}
+
+	if (radeon_fence_activity(rdev, ring))
+		wake_up_all(&rdev->fence_queue);
+
+	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
+
+		/* good news we believe it's a lockup */
+		dev_warn(rdev->dev, "GPU lockup (current fence id "
+			 "0x%016llx last fence id 0x%016llx on ring %d)\n",
+			 (uint64_t)atomic64_read(&fence_drv->last_seq),
+			 fence_drv->sync_seq[ring], ring);
+
+		/* remember that we need an reset */
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
+//	up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+	if (radeon_fence_activity(rdev, ring))
+		wake_up_all(&rdev->fence_queue);
 }

 /**
@ -237,6 +330,77 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 	return false;
 }

+static bool radeon_fence_is_signaled(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+	unsigned ring = fence->ring;
+	u64 seq = fence->seq;
+
+	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+		return true;
+	}
+
+//   if (down_read_trylock(&rdev->exclusive_lock))
+     {
+		radeon_fence_process(rdev, ring);
+//       up_read(&rdev->exclusive_lock);
+
+		if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * radeon_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool radeon_fence_enable_signaling(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+
+	if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
+		return false;
+
+//   if (down_read_trylock(&rdev->exclusive_lock))
+         {
+		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+
+//       if (radeon_fence_activity(rdev, fence->ring))
+//           wake_up_all_locked(&rdev->fence_queue);
+
+		/* did fence get signaled after we enabled the sw irq? */
+		if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
+			radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+//           up_read(&rdev->exclusive_lock);
+			return false;
+		}
+
+//       up_read(&rdev->exclusive_lock);
+//   } else {
+		/* we're probably in a lockup, lets not fiddle too much */
+//       if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
+//           rdev->fence_drv[fence->ring].delayed_irq = true;
+//       radeon_fence_schedule_check(rdev, fence->ring);
+	}
+
+//	fence->fence_wake.flags = 0;
+//	fence->fence_wake.private = NULL;
+	fence->fence_wake.func = radeon_fence_check_signaled;
+	__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
+	fence_get(f);
+
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
+	return true;
+}
+
 /**
 * radeon_fence_signaled - check if a fence has signaled
 *
@ -247,14 +411,15 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 				 */
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
-	if (!fence) {
+	if (!fence)
 		return true;
-	}
-	if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
-		return true;
-	}
+
 	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
-		fence->seq = RADEON_FENCE_SIGNALED_SEQ;
+		int ret;
+
+		ret = fence_signal(&fence->base);
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
 		return true;
 	}
 	return false;
@ -283,49 +448,54 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 }

 /**
- * radeon_fence_wait_seq - wait for a specific sequence numbers
+ * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
 *
 * @rdev: radeon device pointer
 * @target_seq: sequence number(s) we want to wait for
 * @intr: use interruptable sleep
+ * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 *
 * Wait for the requested sequence number(s) to be written by any ring
 * (all asics).  Sequnce number array is indexed by ring id.
 * @intr selects whether to use interruptable (true) or non-interruptable
 * (false) sleep when waiting for the sequence number.  Helper function
 * for radeon_fence_wait_*().
- * Returns 0 if the sequence number has passed, error for all other cases.
+ * Returns remaining time if the sequence number has passed, 0 when
+ * the wait timeout, or an error for all other cases.
 * -EDEADLK is returned when a GPU lockup has been detected.
 */
-static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
-				 bool intr)
+static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
+					  u64 *target_seq, bool intr,
+					  long timeout)
 {
-	uint64_t last_seq[RADEON_NUM_RINGS];
-	bool signaled;
-	int i, r;
+	long r;
+	int i;

-	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+	if (radeon_fence_any_seq_signaled(rdev, target_seq))
+		return timeout;

-		/* Save current sequence values, used to check for GPU lockups */
+	/* enable IRQs and tracing */
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			if (!target_seq[i])
 				continue;

-			last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
 			trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
 			radeon_irq_kms_sw_irq_get(rdev, i);
        }

 		if (intr) {
 			r = wait_event_interruptible_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
 		} else {
 			r = wait_event_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
        }

+	if (rdev->needs_reset)
+		r = -EDEADLK;
+
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			if (!target_seq[i])
 				continue;
@ -334,59 +504,14 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 			trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
 		}

-		if (unlikely(r < 0))
            return r;
-
-		if (unlikely(!signaled)) {
-			if (rdev->needs_reset)
-				return -EDEADLK;
-
-			/* we were interrupted for some reason and fence
-			 * isn't signaled yet, resume waiting */
-			if (r)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-				continue;
-
-				if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
-					break;
-			}
-
-			if (i != RADEON_NUM_RINGS)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-				continue;
-
-				if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
-					break;
-			}
-
-			if (i < RADEON_NUM_RINGS) {
-				/* good news we believe it's a lockup */
-				dev_warn(rdev->dev, "GPU lockup (waiting for "
-					 "0x%016llx last fence id 0x%016llx on"
-					 " ring %d)\n",
-					 target_seq[i], last_seq[i], i);
-
-				/* remember that we need an reset */
-				rdev->needs_reset = true;
-				wake_up_all(&rdev->fence_queue);
-				return -EDEADLK;
-			}
-		}
-	}
-	return 0;
 }

 /**
 * radeon_fence_wait - wait for a fence to signal
 *
 * @fence: radeon fence object
- * @intr: use interruptable sleep
+ * @intr: use interruptible sleep
 *
 * Wait for the requested fence to signal (all asics).
 * @intr selects whether to use interruptable (true) or non-interruptable
@ -396,22 +521,26 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
+	long r;

-	if (fence == NULL) {
-		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return -EINVAL;
-	}
+	/*
+	 * This function should not be called on !radeon fences.
+	 * If this is the case, it would mean this function can
+	 * also be called on radeon fences belonging to another card.
+	 * exclusive_lock is not held in that case.
+	 */
+	if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
+		return fence_wait(&fence->base, intr);

 	seq[fence->ring] = fence->seq;
-	if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
-		return 0;
-
-	r = radeon_fence_wait_seq(fence->rdev, seq, intr);
-	if (r)
+	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 			return r;
+	}

-	fence->seq = RADEON_FENCE_SIGNALED_SEQ;
+	r = fence_signal(&fence->base);
+	if (!r)
+		FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
    return 0;
 }

@ -434,7 +563,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 {
 	uint64_t seq[RADEON_NUM_RINGS];
 	unsigned i, num_rings = 0;
-	int r;
+	long r;

 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		seq[i] = 0;
@ -445,18 +574,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev,

 		seq[i] = fences[i]->seq;
 		++num_rings;
-
-		/* test if something was allready signaled */
-		if (seq[i] == RADEON_FENCE_SIGNALED_SEQ)
-		return 0;
 	}

 	/* nothing to wait for ? */
 	if (num_rings == 0)
 		return -ENOENT;

-	r = radeon_fence_wait_seq(rdev, seq, intr);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		return r;
 	}
 	return 0;
@ -475,6 +600,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
+	long r;

 	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
@ -482,7 +608,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 		   already the last emited fence */
 		return -ENOENT;
 	}
-	return radeon_fence_wait_seq(rdev, seq, false);
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+	return 0;
 }

 /**
@ -498,18 +627,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
+	long r;

 	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
 	if (!seq[ring])
 		return 0;

-	r = radeon_fence_wait_seq(rdev, seq, false);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		if (r == -EDEADLK)
 			return -EDEADLK;

-		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 			ring, r);
 	}
 	return 0;
@ -525,7 +654,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
-	kref_get(&fence->kref);
+	fence_get(&fence->base);
 	return fence;
 }

@ -542,7 +671,7 @@ void radeon_fence_unref(struct radeon_fence **fence)

    *fence = NULL;
 	if (tmp) {
-		kref_put(&tmp->kref, radeon_fence_destroy);
+		fence_put(&tmp->base);
 	}
 }

@ -711,6 +840,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 		rdev->fence_drv[ring].sync_seq[i] = 0;
 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 	rdev->fence_drv[ring].initialized = false;
+	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
+			  radeon_fence_check_lockup);
+	rdev->fence_drv[ring].rdev = rdev;
 }

 /**
@ -758,7 +890,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 		r = radeon_fence_wait_empty(rdev, ring);
 		if (r) {
 			/* no need to trigger GPU reset as we are unloading */
-			radeon_fence_driver_force_completion(rdev);
+			radeon_fence_driver_force_completion(rdev, ring);
 		}
 		wake_up_all(&rdev->fence_queue);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
@ -771,17 +903,14 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 * radeon_fence_driver_force_completion - force all fence waiter to complete
 *
 * @rdev: radeon device pointer
+ * @ring: the ring to complete
 *
 * In case of GPU reset failure make sure no process keep waiting on fence
 * that will never complete.
 */
-void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
 {
-	int ring;
-
-	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
-		if (!rdev->fence_drv[ring].initialized)
-			continue;
+	if (rdev->fence_drv[ring].initialized) {
 		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
 	}
 }
@ -833,6 +962,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
 	down_read(&rdev->exclusive_lock);
 	seq_printf(m, "%d\n", rdev->needs_reset);
 	rdev->needs_reset = true;
+	wake_up_all(&rdev->fence_queue);
 	up_read(&rdev->exclusive_lock);

 	return 0;
@ -852,3 +982,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+static const char *radeon_fence_get_driver_name(struct fence *fence)
+{
+	return "radeon";
+}
+
+static const char *radeon_fence_get_timeline_name(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	switch (fence->ring) {
+	case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
+	case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
+	case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
+	case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
+	case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
+	case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
+	case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
+	case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
+	default: WARN_ON_ONCE(1); return "radeon.unk";
+	}
+}
+
+static inline bool radeon_test_signaled(struct radeon_fence *fence)
+{
+	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
+}
+
+static signed long radeon_fence_default_wait(struct fence *f, bool intr,
+					     signed long t)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+	bool signaled;
+
+	fence_enable_sw_signaling(&fence->base);
+
+	/*
+	 * This function has to return -EDEADLK, but cannot hold
+	 * exclusive_lock during the wait because some callers
+	 * may already hold it. This means checking needs_reset without
+	 * lock, and not fiddling with any gpu internals.
+	 *
+	 * The callback installed with fence_enable_sw_signaling will
+	 * run before our wait_event_*timeout call, so we will see
+	 * both the signaled fence and the changes to needs_reset.
+	 */
+
+	if (intr)
+		t = wait_event_interruptible_timeout(rdev->fence_queue,
+			((signaled = radeon_test_signaled(fence)) ||
+			 rdev->needs_reset), t);
+	else
+		t = wait_event_timeout(rdev->fence_queue,
+			((signaled = radeon_test_signaled(fence)) ||
+			 rdev->needs_reset), t);
+
+	if (t > 0 && !signaled)
+		return -EDEADLK;
+	return t;
+}
+
+const struct fence_ops radeon_fence_ops = {
+	.get_driver_name = radeon_fence_get_driver_name,
+	.get_timeline_name = radeon_fence_get_timeline_name,
+	.enable_signaling = radeon_fence_enable_signaling,
+	.signaled = radeon_fence_is_signaled,
+	.wait = radeon_fence_default_wait,
+	.release = NULL,
+};
--- a/drivers/video/drm/radeon/radeon_gart.c
+++ b/drivers/video/drm/radeon/radeon_gart.c
@ -137,7 +137,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	if (rdev->gart.robj == NULL) {
 		r = radeon_bo_create(rdev, rdev->gart.table_size,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, &rdev->gart.robj);
+				     0, NULL, NULL, &rdev->gart.robj);
        if (r) {
            return r;
        }
--- a/drivers/video/drm/radeon/radeon_gem.c
+++ b/drivers/video/drm/radeon/radeon_gem.c
@ -65,7 +65,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,

 retry:
 	r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain,
-			     flags, NULL, &robj);
+			     flags, NULL, NULL, &robj);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == RADEON_GEM_DOMAIN_VRAM) {
@ -91,7 +91,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 {
 	struct radeon_bo *robj;
 	uint32_t domain;
-	int r;
+	long r;

 	/* FIXME: reeimplement */
 	robj = gem_to_radeon_bo(gobj);
@ -229,9 +229,10 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	return r;
 }

-int radeon_mode_dumb_mmap(struct drm_file *filp,
+static int radeon_mode_mmap(struct drm_file *filp,
 			  struct drm_device *dev,
-			  uint32_t handle, uint64_t *offset_p)
+			    uint32_t handle, bool dumb,
+			    uint64_t *offset_p)
 {
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
@ -240,6 +241,14 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
 	if (gobj == NULL) {
 		return -ENOENT;
 	}
+
+	/*
+	 * We don't allow dumb mmaps on objects created using another
+	 * interface.
+	 */
+	WARN_ONCE(dumb && !(gobj->dumb || gobj->import_attach),
+		"Illegal dumb map of GPU buffer.\n");
+
 	robj = gem_to_radeon_bo(gobj);
 	*offset_p = radeon_bo_mmap_offset(robj);
 	drm_gem_object_unreference_unlocked(gobj);
@ -251,7 +260,8 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_radeon_gem_mmap *args = data;

-	return radeon_mode_dumb_mmap(filp, dev, args->handle, &args->addr_ptr);
+	return radeon_mode_mmap(filp, dev, args->handle, false,
+				&args->addr_ptr);
 }

 int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
@ -283,8 +293,9 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	struct drm_radeon_gem_wait_idle *args = data;
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
-	int r;
+	int r = 0;
 	uint32_t cur_placement = 0;
+	long ret;

 	gobj = drm_gem_object_lookup(dev, filp, args->handle);
 	if (gobj == NULL) {
--- a/drivers/video/drm/radeon/radeon_ib.c
+++ b/drivers/video/drm/radeon/radeon_ib.c
@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 		return r;
 	}

-	r = radeon_semaphore_create(rdev, &ib->semaphore);
-	if (r) {
-		return r;
-	}
+	radeon_sync_create(&ib->sync);

 	ib->ring = ring;
 	ib->fence = NULL;
@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 */
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
+	radeon_sync_free(rdev, &ib->sync, ib->fence);
 	radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
 	radeon_fence_unref(&ib->fence);
 }
@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (ib->vm) {
 		struct radeon_fence *vm_id_fence;
 		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-        	radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
+		radeon_sync_fence(&ib->sync, vm_id_fence);
 	}

 	/* sync with other rings */
-	r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
+	r = radeon_sync_rings(rdev, &ib->sync, ib->ring);
 	if (r) {
 		dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
 		radeon_ring_unlock_undo(rdev, ring);
@ -157,11 +154,12 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	}

 	if (ib->vm)
-		radeon_vm_flush(rdev, ib->vm, ib->ring);
+		radeon_vm_flush(rdev, ib->vm, ib->ring,
+				ib->sync.last_vm_update);

 	if (const_ib) {
 		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
-		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
+		radeon_sync_free(rdev, &const_ib->sync, NULL);
 	}
 	radeon_ring_ib_execute(rdev, ib->ring, ib);
 	r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
@ -269,6 +267,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)

 		r = radeon_ib_test(rdev, i, ring);
 		if (r) {
+			radeon_fence_driver_force_completion(rdev, i);
 			ring->ready = false;
 			rdev->needs_reset = false;

--- a/drivers/video/drm/radeon/radeon_irq_kms.c
+++ b/drivers/video/drm/radeon/radeon_irq_kms.c
@ -205,6 +205,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 	}
 }

+/**
+ * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring)
+{
+	return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1;
+}
+
 /**
 * radeon_irq_kms_sw_irq_put - disable software interrupt
 *
--- a/drivers/video/drm/radeon/radeon_kfd.h
+++ b/drivers/video/drm/radeon/radeon_kfd.h
@ -0,0 +1,47 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * radeon_kfd.h defines the private interface between the
+ * AMD kernel graphics drivers and the AMD KFD.
+ */
+
+#ifndef RADEON_KFD_H_INCLUDED
+#define RADEON_KFD_H_INCLUDED
+
+#include <linux/types.h>
+//#include "../amd/include/kgd_kfd_interface.h"
+
+struct radeon_device;
+
+bool radeon_kfd_init(void);
+void radeon_kfd_fini(void);
+
+void radeon_kfd_suspend(struct radeon_device *rdev);
+int radeon_kfd_resume(struct radeon_device *rdev);
+void radeon_kfd_interrupt(struct radeon_device *rdev,
+			const void *ih_ring_entry);
+void radeon_kfd_device_probe(struct radeon_device *rdev);
+void radeon_kfd_device_init(struct radeon_device *rdev);
+void radeon_kfd_device_fini(struct radeon_device *rdev);
+
+#endif /* RADEON_KFD_H_INCLUDED */
--- a/drivers/video/drm/radeon/radeon_mode.h
+++ b/drivers/video/drm/radeon/radeon_mode.h
@ -321,6 +321,10 @@ struct radeon_crtc {
 	uint32_t crtc_offset;
 	struct drm_gem_object *cursor_bo;
 	uint64_t cursor_addr;
+	int cursor_x;
+	int cursor_y;
+	int cursor_hot_x;
+	int cursor_hot_y;
 	int cursor_width;
 	int cursor_height;
 	int max_cursor_width;
@ -462,6 +466,7 @@ struct radeon_gpio_rec {
 	u8 id;
 	u32 reg;
 	u32 mask;
+	u32 shift;
 };

 struct radeon_hpd {
@ -748,6 +753,8 @@ extern bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev,
 extern bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
 					     struct radeon_atom_ss *ss,
 					     int id, u32 clock);
+extern struct radeon_gpio_rec radeon_atombios_lookup_gpio(struct radeon_device *rdev,
+							  u8 id);

 extern void radeon_compute_pll_legacy(struct radeon_pll *pll,
 			       uint64_t freq,
@ -777,6 +784,7 @@ extern void atombios_digital_setup(struct drm_encoder *encoder, int action);
 extern int atombios_get_encoder_mode(struct drm_encoder *encoder);
 extern bool atombios_set_edp_panel_power(struct drm_connector *connector, int action);
 extern void radeon_encoder_set_active_device(struct drm_encoder *encoder);
+extern bool radeon_encoder_is_digital(struct drm_encoder *encoder);

 extern void radeon_crtc_load_lut(struct drm_crtc *crtc);
 extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
@ -801,13 +809,16 @@ extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
 extern int radeon_crtc_do_set_base(struct drm_crtc *crtc,
 				   struct drm_framebuffer *fb,
 				   int x, int y, int atomic);
-extern int radeon_crtc_cursor_set(struct drm_crtc *crtc,
+extern int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
 				  struct drm_file *file_priv,
 				  uint32_t handle,
 				  uint32_t width,
-				  uint32_t height);
+				   uint32_t height,
+				   int32_t hot_x,
+				   int32_t hot_y);
 extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 				   int x, int y);
+extern void radeon_cursor_reset(struct drm_crtc *crtc);

 extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
 				      unsigned int flags,
--- a/drivers/video/drm/radeon/radeon_object.c
+++ b/drivers/video/drm/radeon/radeon_object.c
@ -96,40 +96,83 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 {
    u32 c = 0, i;

-    rbo->placement.fpfn = 0;
-    rbo->placement.lpfn = 0;
    rbo->placement.placement = rbo->placements;
    rbo->placement.busy_placement = rbo->placements;
-    if (domain & RADEON_GEM_DOMAIN_VRAM)
-        rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+	if (domain & RADEON_GEM_DOMAIN_VRAM) {
+		/* Try placing BOs which don't need CPU access outside of the
+		 * CPU accessible part of VRAM
+		 */
+		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
+			rbo->placements[c].fpfn =
+				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+						     TTM_PL_FLAG_UNCACHED |
                    TTM_PL_FLAG_VRAM;
+		}
+
+		rbo->placements[c].fpfn = 0;
+		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+					     TTM_PL_FLAG_UNCACHED |
+					     TTM_PL_FLAG_VRAM;
+	}
+
    if (domain & RADEON_GEM_DOMAIN_GTT) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT;
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+				TTM_PL_FLAG_TT;
+
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 			   (rbo->rdev->flags & RADEON_IS_AGP)) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
        } else {
-            rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
+						     TTM_PL_FLAG_TT;
        }
    }
+
    if (domain & RADEON_GEM_DOMAIN_CPU) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM;
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+				TTM_PL_FLAG_SYSTEM;
+
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 		    rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
        } else {
-            rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+			rbo->placements[c].fpfn = 0;
+			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
+						     TTM_PL_FLAG_SYSTEM;
        }
    }
-    if (!c)
-        rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+	if (!c) {
+		rbo->placements[c].fpfn = 0;
+		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
+					     TTM_PL_FLAG_SYSTEM;
+	}
+
    rbo->placement.num_placement = c;
    rbo->placement.num_busy_placement = c;

+	for (i = 0; i < c; ++i) {
+		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
+		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+		    !rbo->placements[i].fpfn)
+			rbo->placements[i].lpfn =
+				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		else
+			rbo->placements[i].lpfn = 0;
+	}
+
    /*
     * Use two-ended allocation depending on the buffer size to
     * improve fragmentation quality.
@ -137,14 +180,16 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
     */
 	if (rbo->tbo.mem.size > 512 * 1024) {
        for (i = 0; i < c; i++) {
-            rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN;
+			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
        }
    }
 }

 int radeon_bo_create(struct radeon_device *rdev,
-                unsigned long size, int byte_align, bool kernel, u32 domain,
-		     u32 flags, struct sg_table *sg, struct radeon_bo **bo_ptr)
+		     unsigned long size, int byte_align, bool kernel,
+		     u32 domain, u32 flags, struct sg_table *sg,
+		     struct reservation_object *resv,
+		     struct radeon_bo **bo_ptr)
 {
    struct radeon_bo *bo;
 	enum ttm_bo_type type;
@ -187,18 +232,19 @@ int radeon_bo_create(struct radeon_device *rdev,
 	if (!(rdev->flags & RADEON_IS_PCIE))
 		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

-//    printf("%s rdev->flags %x bo->flags %x\n",
-//           __FUNCTION__, bo->flags);
-
-	if(flags & RADEON_GEM_GTT_WC)
-		bo->flags&= ~RADEON_GEM_GTT_WC;
+#ifdef CONFIG_X86_32
+	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
+	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
+	 */
+	bo->flags &= ~RADEON_GEM_GTT_WC;
+#endif

    radeon_ttm_placement_from_domain(bo, domain);
 	/* Kernel allocation are uninterruptible */
 //	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, page_align, !kernel, NULL,
-			acc_size, sg, &radeon_ttm_bo_destroy);
+			acc_size, sg, resv, &radeon_ttm_bo_destroy);
 //	up_read(&rdev->pm.mclk_lock);
 	if (unlikely(r != 0)) {
 		return r;
@ -289,21 +335,19 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
        return 0;
    }
    radeon_ttm_placement_from_domain(bo, domain);
-    if (domain == RADEON_GEM_DOMAIN_VRAM) {
+	for (i = 0; i < bo->placement.num_placement; i++) {
        /* force to pin into visible video ram */
-        bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
-    }
-    if (max_offset) {
-            u64 lpfn = max_offset >> PAGE_SHIFT;
+		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
+			bo->placements[i].lpfn =
+				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		else
+			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;

-            if (!bo->placement.lpfn)
-                    bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT;
-
-            if (lpfn < bo->placement.lpfn)
-                    bo->placement.lpfn = lpfn;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
    }
-    for (i = 0; i < bo->placement.num_placement; i++)
-        bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
    if (likely(r == 0)) {
        bo->pin_count = 1;
@ -335,8 +379,10 @@ int radeon_bo_unpin(struct radeon_bo *bo)
    bo->pin_count--;
    if (bo->pin_count)
            return 0;
-    for (i = 0; i < bo->placement.num_placement; i++)
-            bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+	for (i = 0; i < bo->placement.num_placement; i++) {
+		bo->placements[i].lpfn = 0;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+	}
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
@ -422,24 +468,29 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			    struct ww_acquire_ctx *ticket,
 			    struct list_head *head, int ring)
 {
-	struct radeon_cs_reloc *lobj;
-	struct radeon_bo *bo;
+	struct radeon_bo_list *lobj;
+	struct list_head duplicates;
 	int r;
 	u64 bytes_moved = 0, initial_bytes_moved;
 	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);

-	r = ttm_eu_reserve_buffers(ticket, head);
+	INIT_LIST_HEAD(&duplicates);
+	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
 	if (unlikely(r != 0)) {
 		return r;
 	}

 	list_for_each_entry(lobj, head, tv.head) {
-		bo = lobj->robj;
+		struct radeon_bo *bo = lobj->robj;
 		if (!bo->pin_count) {
 			u32 domain = lobj->prefered_domains;
+			u32 allowed = lobj->allowed_domains;
 			u32 current_domain =
 				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);

+			WARN_ONCE(bo->gem_base.dumb,
+				  "GPU use of dumb buffer is illegal.\n");
+
 			/* Check if this buffer will be moved and don't move it
 			 * if we have moved too many buffers for this IB already.
 			 *
@ -448,7 +499,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			 * into account. We don't want to disallow buffer moves
 			 * completely.
 			 */
-			if ((lobj->allowed_domains & current_domain) != 0 &&
+			if ((allowed & current_domain) != 0 &&
 			    (domain & current_domain) == 0 && /* will be moved */
 			    bytes_moved > bytes_moved_threshold) {
 				/* don't move it */
@ -458,7 +509,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
-				radeon_uvd_force_into_uvd_segment(bo);
+				radeon_uvd_force_into_uvd_segment(bo, allowed);

 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
@ -478,6 +529,12 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
 		lobj->tiling_flags = bo->tiling_flags;
 	}
+
+	list_for_each_entry(lobj, &duplicates, tv.head) {
+		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
+		lobj->tiling_flags = lobj->robj->tiling_flags;
+	}
+
 	return 0;
 }

@ -678,12 +735,29 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
 	if (unlikely(r != 0))
 		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
+
 		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
+
+/**
+ * radeon_bo_fence - add fence to buffer object
+ *
+ * @bo: buffer object in question
+ * @fence: fence to add
+ * @shared: true if fence should be added shared
+ *
+ */
+void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+                     bool shared)
+{
+	struct reservation_object *resv = bo->tbo.resv;
+
+	if (shared)
+		reservation_object_add_shared_fence(resv, &fence->base);
+	else
+		reservation_object_add_excl_fence(resv, &fence->base);
+}
--- a/drivers/video/drm/radeon/radeon_object.h
+++ b/drivers/video/drm/radeon/radeon_object.h
@ -126,6 +126,7 @@ extern int radeon_bo_create(struct radeon_device *rdev,
 				unsigned long size, int byte_align,
 			    bool kernel, u32 domain, u32 flags,
 			    struct sg_table *sg,
+			    struct reservation_object *resv,
 				struct radeon_bo **bo_ptr);
 extern int radeon_bo_kmap(struct radeon_bo *bo, void **ptr);
 extern void radeon_bo_kunmap(struct radeon_bo *bo);
@ -154,6 +155,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *new_mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+			    bool shared);

 /*
 * sub allocation
--- a/drivers/video/drm/radeon/radeon_pm.c
+++ b/drivers/video/drm/radeon/radeon_pm.c
@ -1479,7 +1479,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev)
 		if (rdev->pm.active_crtcs & (1 << crtc)) {
 			vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, crtc, 0, &vpos, &hpos, NULL, NULL);
 			if ((vbl_status & DRM_SCANOUTPOS_VALID) &&
-			    !(vbl_status & DRM_SCANOUTPOS_INVBL))
+			    !(vbl_status & DRM_SCANOUTPOS_IN_VBLANK))
 				in_vbl = false;
 		}
 		}
--- a/drivers/video/drm/radeon/radeon_ring.c
+++ b/drivers/video/drm/radeon/radeon_ring.c
@ -44,27 +44,6 @@
 */
 static int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);

-/**
- * radeon_ring_write - write a value to the ring
- *
- * @ring: radeon_ring structure holding ring information
- * @v: dword (dw) value to write
- *
- * Write a value to the requested ring buffer (all asics).
- */
-void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
-{
-#if DRM_DEBUG_CODE
-	if (ring->count_dw <= 0) {
-		DRM_ERROR("radeon: writing more dwords to the ring than expected!\n");
-	}
-#endif
-	ring->ring[ring->wptr++] = v;
-	ring->wptr &= ring->ptr_mask;
-	ring->count_dw--;
-	ring->ring_free_dw--;
-}
-
 /**
 * radeon_ring_supports_scratch_reg - check if the ring supports
 * writing to scratch registers
@ -404,7 +383,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0,
+				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
 				     NULL, &ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring create failed\n", r);
--- a/drivers/video/drm/radeon/radeon_sa.c
+++ b/drivers/video/drm/radeon/radeon_sa.c
@ -65,7 +65,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 	}

 	r = radeon_bo_create(rdev, size, align, true,
-			     domain, flags, NULL, &sa_manager->bo);
+			     domain, flags, NULL, NULL, &sa_manager->bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
--- a/drivers/video/drm/radeon/radeon_semaphore.c
+++ b/drivers/video/drm/radeon/radeon_semaphore.c
@ -34,15 +34,14 @@
 int radeon_semaphore_create(struct radeon_device *rdev,
 			    struct radeon_semaphore **semaphore)
 {
-	uint64_t *cpu_addr;
-	int i, r;
+	int r;

 	*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
 	if (*semaphore == NULL) {
 		return -ENOMEM;
 	}
-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
-			     8 * RADEON_NUM_SYNCS, 8);
+	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
+			     &(*semaphore)->sa_bo, 8, 8);
 	if (r) {
 		kfree(*semaphore);
 		*semaphore = NULL;
@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 	(*semaphore)->waiters = 0;
 	(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);

-	cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
-	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
-		cpu_addr[i] = 0;
-
-	for (i = 0; i < RADEON_NUM_RINGS; ++i)
-		(*semaphore)->sync_to[i] = NULL;
+	*((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;

 	return 0;
 }
@ -95,99 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
 	return false;
 }

-/**
- * radeon_semaphore_sync_to - use the semaphore to sync to a fence
- *
- * @semaphore: semaphore object to add fence to
- * @fence: fence to sync to
- *
- * Sync to the fence using this semaphore object
- */
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-			      struct radeon_fence *fence)
-{
-        struct radeon_fence *other;
-
-        if (!fence)
-                return;
-
-        other = semaphore->sync_to[fence->ring];
-        semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
-}
-
-/**
- * radeon_semaphore_sync_rings - sync ring to all registered fences
- *
- * @rdev: radeon_device pointer
- * @semaphore: semaphore object to use for sync
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
-				struct radeon_semaphore *semaphore,
-				int ring)
-{
-	unsigned count = 0;
-	int i, r;
-
-        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-		struct radeon_fence *fence = semaphore->sync_to[i];
-
-		/* check if we really need to sync */
-                if (!radeon_fence_need_sync(fence, ring))
-			continue;
-
-	/* prevent GPU deadlocks */
-		if (!rdev->ring[i].ready) {
-			dev_err(rdev->dev, "Syncing to a disabled ring!");
-		return -EINVAL;
-	}
-
-		if (++count > RADEON_NUM_SYNCS) {
-			/* not enough room, wait manually */
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		/* allocate enough space for sync command */
-		r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
-	if (r) {
-		return r;
-	}
-
-		/* emit the signal semaphore */
-		if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
-			/* signaling wasn't successful wait manually */
-			radeon_ring_undo(&rdev->ring[i]);
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-	/* we assume caller has already allocated space on waiters ring */
-		if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
-			/* waiting wasn't successful wait manually */
-			radeon_ring_undo(&rdev->ring[i]);
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		radeon_ring_commit(rdev, &rdev->ring[i], false);
-		radeon_fence_note_sync(fence, ring);
-
-		semaphore->gpu_addr += 8;
-	}
-
-	return 0;
-}
-
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence)
--- a/drivers/video/drm/radeon/radeon_sync.c
+++ b/drivers/video/drm/radeon/radeon_sync.c
@ -0,0 +1,220 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_trace.h"
+
+/**
+ * radeon_sync_create - zero init sync object
+ *
+ * @sync: sync object to initialize
+ *
+ * Just clear the sync object for now.
+ */
+void radeon_sync_create(struct radeon_sync *sync)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+		sync->semaphores[i] = NULL;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		sync->sync_to[i] = NULL;
+
+	sync->last_vm_update = NULL;
+}
+
+/**
+ * radeon_sync_fence - use the semaphore to sync to a fence
+ *
+ * @sync: sync object to add fence to
+ * @fence: fence to sync to
+ *
+ * Sync to the fence using the semaphore objects
+ */
+void radeon_sync_fence(struct radeon_sync *sync,
+		       struct radeon_fence *fence)
+{
+	struct radeon_fence *other;
+
+	if (!fence)
+		return;
+
+	other = sync->sync_to[fence->ring];
+	sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+
+	if (fence->is_vm_update) {
+		other = sync->last_vm_update;
+		sync->last_vm_update = radeon_fence_later(fence, other);
+	}
+}
+
+/**
+ * radeon_sync_resv - use the semaphores to sync to a reservation object
+ *
+ * @sync: sync object to add fences from reservation object to
+ * @resv: reservation object with embedded fence
+ * @shared: true if we should only sync to the exclusive fence
+ *
+ * Sync to the fence using the semaphore objects
+ */
+int radeon_sync_resv(struct radeon_device *rdev,
+		     struct radeon_sync *sync,
+		     struct reservation_object *resv,
+		     bool shared)
+{
+	struct reservation_object_list *flist;
+	struct fence *f;
+	struct radeon_fence *fence;
+	unsigned i;
+	int r = 0;
+
+	/* always sync to the exclusive fence */
+	f = reservation_object_get_excl(resv);
+	fence = f ? to_radeon_fence(f) : NULL;
+	if (fence && fence->rdev == rdev)
+		radeon_sync_fence(sync, fence);
+	else if (f)
+		r = fence_wait(f, true);
+
+	flist = reservation_object_get_list(resv);
+	if (shared || !flist || r)
+		return r;
+
+	for (i = 0; i < flist->shared_count; ++i) {
+		f = rcu_dereference_protected(flist->shared[i],
+					      reservation_object_held(resv));
+		fence = to_radeon_fence(f);
+		if (fence && fence->rdev == rdev)
+			radeon_sync_fence(sync, fence);
+		else
+			r = fence_wait(f, true);
+
+		if (r)
+			break;
+	}
+	return r;
+}
+
+/**
+ * radeon_sync_rings - sync ring to all registered fences
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @ring: ring that needs sync
+ *
+ * Ensure that all registered fences are signaled before letting
+ * the ring continue. The caller must hold the ring lock.
+ */
+int radeon_sync_rings(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+		      int ring)
+{
+	unsigned count = 0;
+	int i, r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_fence *fence = sync->sync_to[i];
+		struct radeon_semaphore *semaphore;
+
+		/* check if we really need to sync */
+		if (!radeon_fence_need_sync(fence, ring))
+			continue;
+
+		/* prevent GPU deadlocks */
+		if (!rdev->ring[i].ready) {
+			dev_err(rdev->dev, "Syncing to a disabled ring!");
+			return -EINVAL;
+		}
+
+		if (count >= RADEON_NUM_SYNCS) {
+			/* not enough room, wait manually */
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+		r = radeon_semaphore_create(rdev, &semaphore);
+		if (r)
+			return r;
+
+		sync->semaphores[count++] = semaphore;
+
+		/* allocate enough space for sync command */
+		r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
+		if (r)
+			return r;
+
+		/* emit the signal semaphore */
+		if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
+			/* signaling wasn't successful wait manually */
+			radeon_ring_undo(&rdev->ring[i]);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+
+		/* we assume caller has already allocated space on waiters ring */
+		if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
+			/* waiting wasn't successful wait manually */
+			radeon_ring_undo(&rdev->ring[i]);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+
+		radeon_ring_commit(rdev, &rdev->ring[i], false);
+		radeon_fence_note_sync(fence, ring);
+	}
+
+	return 0;
+}
+
+/**
+ * radeon_sync_free - free the sync object
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @fence: fence to use for the free
+ *
+ * Free the sync object by freeing all semaphores in it.
+ */
+void radeon_sync_free(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+		      struct radeon_fence *fence)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+		radeon_semaphore_free(rdev, &sync->semaphores[i], fence);
+}
--- a/drivers/video/drm/radeon/radeon_test.c
+++ b/drivers/video/drm/radeon/radeon_test.c
@ -67,7 +67,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 	}

 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-			     0, NULL, &vram_obj);
+			     0, NULL, NULL, &vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
@ -87,7 +87,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		struct radeon_fence *fence = NULL;

 		r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
+				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			goto out_lclean;
@ -116,11 +117,16 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		radeon_bo_kunmap(gtt_obj[i]);

 		if (ring == R600_RING_TYPE_DMA_INDEX)
-			r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+			fence = radeon_copy_dma(rdev, gtt_addr, vram_addr,
+						size / RADEON_GPU_PAGE_SIZE,
+						NULL);
 		else
-			r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-		if (r) {
+			fence = radeon_copy_blit(rdev, gtt_addr, vram_addr,
+						 size / RADEON_GPU_PAGE_SIZE,
+						 NULL);
+		if (IS_ERR(fence)) {
 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
+			r = PTR_ERR(fence);
 			goto out_lclean_unpin;
 		}

@ -162,11 +168,16 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		radeon_bo_kunmap(vram_obj);

 		if (ring == R600_RING_TYPE_DMA_INDEX)
-			r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+			fence = radeon_copy_dma(rdev, vram_addr, gtt_addr,
+						size / RADEON_GPU_PAGE_SIZE,
+						NULL);
 		else
-			r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-		if (r) {
+			fence = radeon_copy_blit(rdev, vram_addr, gtt_addr,
+						 size / RADEON_GPU_PAGE_SIZE,
+						 NULL);
+		if (IS_ERR(fence)) {
 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
+			r = PTR_ERR(fence);
 			goto out_lclean_unpin;
 		}

@ -222,7 +233,7 @@ out_lclean:
 			radeon_bo_unreserve(gtt_obj[i]);
 			radeon_bo_unref(&gtt_obj[i]);
 		}
-		if (fence)
+		if (fence && !IS_ERR(fence))
 			radeon_fence_unref(&fence);
 		break;
 	}
--- a/drivers/video/drm/radeon/radeon_ttm.c
+++ b/drivers/video/drm/radeon/radeon_ttm.c
@ -166,12 +166,15 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 {
+	static struct ttm_place placements = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
+	};
+
 	struct radeon_bo *rbo;
-	static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;

 	if (!radeon_ttm_bo_is_radeon_bo(bo)) {
-		placement->fpfn = 0;
-		placement->lpfn = 0;
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->num_placement = 1;
@ -181,9 +184,32 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
+		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
-		else
+		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
+			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
+			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+			int i;
+
+			/* Try evicting to the CPU inaccessible part of VRAM
+			 * first, but only set GTT as busy placement, so this
+			 * BO will be evicted to GTT rather than causing other
+			 * BOs to be evicted from VRAM
+			 */
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
+							 RADEON_GEM_DOMAIN_GTT);
+			rbo->placement.num_busy_placement = 0;
+			for (i = 0; i < rbo->placement.num_placement; i++) {
+				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
+					if (rbo->placements[0].fpfn < fpfn)
+						rbo->placements[0].fpfn = fpfn;
+				} else {
+					rbo->placement.busy_placement =
+						&rbo->placements[i];
+					rbo->placement.num_busy_placement = 1;
+				}
+			}
+		} else
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
 		break;
 	case TTM_PL_TT:
@ -216,6 +242,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
 	struct radeon_fence *fence;
+	unsigned num_pages;
 	int r, ridx;

 	rdev = radeon_get_rdev(bo->bdev);
@ -252,13 +279,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,

 	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);

-	/* sync other rings */
-	fence = bo->sync_obj;
-	r = radeon_copy(rdev, old_start, new_start,
-			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
-			&fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
+	num_pages = new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+	fence = radeon_copy(rdev, old_start, new_start, num_pages, bo->resv);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
 				      evict, no_wait_gpu, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
@ -272,20 +298,20 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	struct radeon_device *rdev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
-	u32 placements;
+	struct ttm_place placements;
 	struct ttm_placement placement;
 	int r;

 	rdev = radeon_get_rdev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
 	placement.num_placement = 1;
 	placement.placement = &placements;
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+	placements.fpfn = 0;
+	placements.lpfn = 0;
+	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
 			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
@ -320,19 +346,19 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
 	struct ttm_placement placement;
-	u32 placements;
+	struct ttm_place placements;
 	int r;

 	rdev = radeon_get_rdev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
 	placement.num_placement = 1;
 	placement.placement = &placements;
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+	placements.fpfn = 0;
+	placements.lpfn = 0;
+	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
 			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
@ -471,31 +497,6 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }

-static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
-{
-	return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
-}
-
-static int radeon_sync_obj_flush(void *sync_obj)
-{
-	return 0;
-}
-
-static void radeon_sync_obj_unref(void **sync_obj)
-{
-	radeon_fence_unref((struct radeon_fence **)sync_obj);
-}
-
-static void *radeon_sync_obj_ref(void *sync_obj)
-{
-	return radeon_fence_ref((struct radeon_fence *)sync_obj);
-}
-
-static bool radeon_sync_obj_signaled(void *sync_obj)
-{
-	return radeon_fence_signaled((struct radeon_fence *)sync_obj);
-}
-
 /*
 * TTM backend functions.
 */
@ -503,6 +504,10 @@ struct radeon_ttm_tt {
 	struct ttm_dma_tt		ttm;
 	struct radeon_device		*rdev;
 	u64				offset;
+
+	uint64_t			userptr;
+	struct mm_struct		*usermm;
+	uint32_t			userflags;
 };

 static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
@ -580,10 +585,17 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_bo_device *bdev,
 	return &gtt->ttm.ttm;
 }

+static struct radeon_ttm_tt *radeon_ttm_tt_to_gtt(struct ttm_tt *ttm)
+{
+	if (!ttm || ttm->func != &radeon_backend_func)
+		return NULL;
+	return (struct radeon_ttm_tt *)ttm;
+}
+
 static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
 {
+	struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(ttm);
 	struct radeon_device *rdev;
-	struct radeon_ttm_tt *gtt = (void *)ttm;
 	unsigned i;
 	int r;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
@ -628,7 +640,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
 static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
 	struct radeon_device *rdev;
-	struct radeon_ttm_tt *gtt = (void *)ttm;
+	struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(ttm);
 	unsigned i;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);

@ -663,11 +675,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.evict_flags = &radeon_evict_flags,
 	.move = &radeon_bo_move,
 	.verify_access = &radeon_verify_access,
-	.sync_obj_signaled = &radeon_sync_obj_signaled,
-	.sync_obj_wait = &radeon_sync_obj_wait,
-	.sync_obj_flush = &radeon_sync_obj_flush,
-	.sync_obj_unref = &radeon_sync_obj_unref,
-	.sync_obj_ref = &radeon_sync_obj_ref,
 	.move_notify = &radeon_bo_move_notify,
 //	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
@ -703,8 +710,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* Change the size here instead of the init above so only lpfn is affected */
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);

-    r = radeon_bo_create(rdev, 16*1024*1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0,
+	r = radeon_bo_create(rdev, 16 * 1024 * 1024, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
 			     NULL, &rdev->stollen_vga_memory);
 	if (r) {
 		return r;
--- a/drivers/video/drm/radeon/radeon_uvd.c
+++ b/drivers/video/drm/radeon/radeon_uvd.c
@ -46,6 +46,9 @@
 #define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_uvd.bin"

+MODULE_FIRMWARE(FIRMWARE_R600);
+MODULE_FIRMWARE(FIRMWARE_RS780);
+MODULE_FIRMWARE(FIRMWARE_RV770);
 MODULE_FIRMWARE(FIRMWARE_RV710);
 MODULE_FIRMWARE(FIRMWARE_CYPRESS);
 MODULE_FIRMWARE(FIRMWARE_SUMO);
@ -115,9 +118,11 @@ int radeon_uvd_init(struct radeon_device *rdev)
 	}

 	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
-		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
+		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
+		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo);
+			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
 		return r;
@ -231,10 +236,30 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 	return 0;
 }

-void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
+				       uint32_t allowed_domains)
 {
-	rbo->placement.fpfn = 0 >> PAGE_SHIFT;
-	rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+	int i;
+
+	for (i = 0; i < rbo->placement.num_placement; ++i) {
+		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
+		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+	}
+
+	/* If it must be in VRAM it must be in the first segment as well */
+	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
+		return;
+
+	/* abort if we already have more than one placement */
+	if (rbo->placement.num_placement > 1)
+		return;
+
+	/* add another 256MB segment */
+	rbo->placements[1] = rbo->placements[0];
+	rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
+	rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
+	rbo->placement.num_placement++;
+	rbo->placement.num_busy_placement++;
 }

 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
@ -356,6 +381,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 {
 	int32_t *msg, msg_type, handle;
 	unsigned img_size = 0;
+	struct fence *f;
 	void *ptr;

 	int i, r;
@ -365,8 +391,9 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 		return -EINVAL;
 	}

-	if (bo->tbo.sync_obj) {
-		r = radeon_fence_wait(bo->tbo.sync_obj, false);
+	f = reservation_object_get_excl(bo->tbo.resv);
+	if (f) {
+		r = radeon_fence_wait((struct radeon_fence *)f, false);
 		if (r) {
 			DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
 			return r;
@ -441,12 +468,12 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 			       unsigned buf_sizes[], bool *has_msg_cmd)
 {
 	struct radeon_cs_chunk *relocs_chunk;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	unsigned idx, cmd, offset;
 	uint64_t start, end;
 	int r;

-	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	relocs_chunk = p->chunk_relocs;
 	offset = radeon_get_ib_value(p, data0);
 	idx = radeon_get_ib_value(p, data1);
 	if (idx >= relocs_chunk->length_dw) {
@ -455,7 +482,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 		return -EINVAL;
 	}

-	reloc = p->relocs_ptr[(idx / 4)];
+	reloc = &p->relocs[(idx / 4)];
 	start = reloc->gpu_offset;
 	end = start + radeon_bo_size(reloc->robj);
 	start += offset;
@ -563,13 +590,13 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
 		[0x00000003]	=	2048,
 	};

-	if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
+	if (p->chunk_ib->length_dw % 16) {
 		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
-			  p->chunks[p->chunk_ib_idx].length_dw);
+			  p->chunk_ib->length_dw);
 		return -EINVAL;
 	}

-	if (p->chunk_relocs_idx == -1) {
+	if (p->chunk_relocs == NULL) {
 		DRM_ERROR("No relocation chunk !\n");
 		return -EINVAL;
 	}
@ -593,7 +620,7 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
 			return -EINVAL;
 		}
-	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	} while (p->idx < p->chunk_ib->length_dw);

 	if (!has_msg_cmd) {
 		DRM_ERROR("UVD-IBs need a msg command!\n");
@ -604,38 +631,16 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
 }

 static int radeon_uvd_send_msg(struct radeon_device *rdev,
-			       int ring, struct radeon_bo *bo,
+			       int ring, uint64_t addr,
 			       struct radeon_fence **fence)
 {
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
 	struct radeon_ib ib;
-	uint64_t addr;
 	int i, r;

-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
-
-	r = ttm_eu_reserve_buffers(&ticket, &head);
-	if (r)
-		return r;
-
-	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
-	radeon_uvd_force_into_uvd_segment(bo);
-
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	if (r) 
-		goto err;
-
 	r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
 	if (r)
-		goto err;
+		return r;

-	addr = radeon_bo_gpu_offset(bo);
 	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
 	ib.ptr[1] = addr;
 	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
@ -647,19 +652,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 	ib.length_dw = 16;

 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
-	if (r)
-		goto err;
-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);

 	if (fence)
 		*fence = radeon_fence_ref(ib.fence);

 	radeon_ib_free(rdev, &ib);
-	radeon_bo_unref(&bo);
-	return 0;
-
-err:
-	ttm_eu_backoff_reservation(&ticket, &head);
 	return r;
 }

@ -669,28 +666,19 @@ err:
 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence)
 {
-	struct radeon_bo *bo;
-	uint32_t *msg;
+	/* we use the last page of the vcpu bo for the UVD message */
+	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
+		RADEON_GPU_PAGE_SIZE;
+
+	uint32_t *msg = rdev->uvd.cpu_addr + offs;
+	uint64_t addr = rdev->uvd.gpu_addr + offs;
+
 	int r, i;

-	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
 	if (r)
 		return r;

-	r = radeon_bo_reserve(bo, false);
-	if (r) {
-		radeon_bo_unref(&bo);
-		return r;
-	}
-
-	r = radeon_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000000);
@ -706,37 +694,27 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);

-	radeon_bo_kunmap(bo);
-	radeon_bo_unreserve(bo);
-
-	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	return r;
 }

 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 			       uint32_t handle, struct radeon_fence **fence)
 {
-	struct radeon_bo *bo;
-	uint32_t *msg;
+	/* we use the last page of the vcpu bo for the UVD message */
+	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
+		RADEON_GPU_PAGE_SIZE;
+
+	uint32_t *msg = rdev->uvd.cpu_addr + offs;
+	uint64_t addr = rdev->uvd.gpu_addr + offs;
+
 	int r, i;

-	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
 	if (r)
 		return r;

-	r = radeon_bo_reserve(bo, false);
-	if (r) {
-		radeon_bo_unref(&bo);
-		return r;
-	}
-
-	r = radeon_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
-		return r;
-	}
-
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000002);
@ -745,10 +723,9 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);

-	radeon_bo_kunmap(bo);
-	radeon_bo_unreserve(bo);
-
-	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	return r;
 }

 /**
--- a/drivers/video/drm/radeon/radeon_vce.c
+++ b/drivers/video/drm/radeon/radeon_vce.c
@ -126,7 +126,8 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->vce.vcpu_bo);
+			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
+			     &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
 		return r;
@ -452,11 +453,11 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
 			unsigned size)
 {
 	struct radeon_cs_chunk *relocs_chunk;
-	struct radeon_cs_reloc *reloc;
+	struct radeon_bo_list *reloc;
 	uint64_t start, end, offset;
 	unsigned idx;

-	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	relocs_chunk = p->chunk_relocs;
 	offset = radeon_get_ib_value(p, lo);
 	idx = radeon_get_ib_value(p, hi);

@ -466,7 +467,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
 		return -EINVAL;
 	}

-	reloc = p->relocs_ptr[(idx / 4)];
+	reloc = &p->relocs[(idx / 4)];
 	start = reloc->gpu_offset;
 	end = start + radeon_bo_size(reloc->robj);
 	start += offset;
@ -533,7 +534,7 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
 	uint32_t *size = &tmp;
 	int i, r;

-	while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) {
+	while (p->idx < p->chunk_ib->length_dw) {
 		uint32_t len = radeon_get_ib_value(p, p->idx);
 		uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);

--- a/drivers/video/drm/radeon/radeon_vm.c
+++ b/drivers/video/drm/radeon/radeon_vm.c
@ -125,39 +125,37 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
 * Add the page directory to the list of BOs to
 * validate for command submission (cayman+).
 */
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 					  struct radeon_vm *vm,
 					  struct list_head *head)
 {
-	struct radeon_cs_reloc *list;
+	struct radeon_bo_list *list;
 	unsigned i, idx;

 	list = kmalloc_array(vm->max_pde_used + 2,
-			     sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+			     sizeof(struct radeon_bo_list), GFP_KERNEL);
 	if (!list)
 		return NULL;

 	/* add the vm page table to the list */
-	list[0].gobj = NULL;
 	list[0].robj = vm->page_directory;
 	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
+	list[0].tv.shared = true;
 	list[0].tiling_flags = 0;
-	list[0].handle = 0;
 	list_add(&list[0].tv.head, head);

 	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
 		if (!vm->page_tables[i].bo)
 			continue;

-		list[idx].gobj = NULL;
 		list[idx].robj = vm->page_tables[i].bo;
 		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
+		list[idx].tv.shared = true;
 		list[idx].tiling_flags = 0;
-		list[idx].handle = 0;
 		list_add(&list[idx++].tv.head, head);
 	}

@ -180,15 +178,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring)
 {
 	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
+
 	unsigned choices[2] = {};
 	unsigned i;

 	/* check if the id is still valid */
-	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+	if (vm_id->id && vm_id->last_id_use &&
+	    vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
 		return NULL;

 	/* we definately need to flush */
-	radeon_fence_unref(&vm->last_flush);
+	vm_id->pd_gpu_addr = ~0ll;

 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@ -196,8 +197,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,

 		if (fence == NULL) {
 			/* found a free one */
-			vm->id = i;
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = i;
+			trace_radeon_vm_grab_id(i, ring);
 			return NULL;
 		}

@ -209,8 +210,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,

 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
-			vm->id = choices[i];
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = choices[i];
+			trace_radeon_vm_grab_id(choices[i], ring);
 			return rdev->vm_manager.active[choices[i]];
 		}
 	}
@ -226,6 +227,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 * @rdev: radeon_device pointer
 * @vm: vm we want to flush
 * @ring: ring to use for flush
+ * @updates: last vm update that is waited for
 *
 * Flush the vm (cayman+).
 *
@ -233,15 +235,21 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 */
 void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
-		     int ring)
+		     int ring, struct radeon_fence *updates)
 {
 	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
+
+	if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
+	    radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
+
+		trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
+		radeon_fence_unref(&vm_id->flushed_updates);
+		vm_id->flushed_updates = radeon_fence_ref(updates);
+		vm_id->pd_gpu_addr = pd_addr;
+		radeon_ring_vm_flush(rdev, &rdev->ring[ring],
+				     vm_id->id, vm_id->pd_gpu_addr);

-	/* if we can't remember our last VM flush then flush now! */
-	if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
-		trace_radeon_vm_flush(pd_addr, ring, vm->id);
-		vm->pd_gpu_addr = pd_addr;
-		radeon_ring_vm_flush(rdev, ring, vm);
 	}
 }

@ -261,18 +269,13 @@ void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence)
 {
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(fence);
+	unsigned vm_id = vm->ids[fence->ring].id;

-	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
-	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+	radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
+	rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);

-	radeon_fence_unref(&vm->last_id_use);
-	vm->last_id_use = radeon_fence_ref(fence);
-
-        /* we just flushed the VM, remember that */
-        if (!vm->last_flush)
-                vm->last_flush = radeon_fence_ref(fence);
+	radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
+	vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
 }

 /**
@ -385,34 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev,
 static int radeon_vm_clear_bo(struct radeon_device *rdev,
 			      struct radeon_bo *bo)
 {
-        struct ttm_validate_buffer tv;
-        struct ww_acquire_ctx ticket;
-        struct list_head head;
 	struct radeon_ib ib;
 	unsigned entries;
 	uint64_t addr;
 	int r;

-        memset(&tv, 0, sizeof(tv));
-        tv.bo = &bo->tbo;
-
-        INIT_LIST_HEAD(&head);
-        list_add(&tv.head, &head);
-
-        r = ttm_eu_reserve_buffers(&ticket, &head);
+	r = radeon_bo_reserve(bo, false);
        if (r)
 		return r;

        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
        if (r)
-                goto error;
+		goto error_unreserve;

 	addr = radeon_bo_gpu_offset(bo);
 	entries = radeon_bo_size(bo) / 8;

 	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
 	if (r)
-                goto error;
+		goto error_unreserve;

 	ib.length_dw = 0;

@ -422,15 +416,16 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,

 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r)
-                goto error;
+		goto error_free;

-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
+	ib.fence->is_vm_update = true;
+	radeon_bo_fence(bo, ib.fence, false);
+
+error_free:
 	radeon_ib_free(rdev, &ib);

-	return 0;
-
-error:
-	ttm_eu_backoff_reservation(&ticket, &head);
+error_unreserve:
+	radeon_bo_unreserve(bo);
 	return r;
 }

@ -446,7 +441,7 @@ error:
 * Validate and set the offset requested within the vm address space.
 * Returns 0 for success, error for failure.
 *
- * Object has to be reserved!
+ * Object has to be reserved and gets unreserved by this function!
 */
 int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			  struct radeon_bo_va *bo_va,
@ -492,7 +487,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			tmp->vm = vm;
 			tmp->addr = bo_va->addr;
 			tmp->bo = radeon_bo_ref(bo_va->bo);
+			spin_lock(&vm->status_lock);
 			list_add(&tmp->vm_status, &vm->freed);
+			spin_unlock(&vm->status_lock);
 		}

 		interval_tree_remove(&bo_va->it, &vm->va);
@ -545,7 +542,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,

 		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
 				     RADEON_GPU_PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt);
+				     RADEON_GEM_DOMAIN_VRAM, 0,
+				     NULL, NULL, &pt);
 		if (r)
 			return r;

@ -571,7 +569,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	}

 	mutex_unlock(&vm->mutex);
-	return radeon_bo_reserve(bo_va->bo, false);
+	return 0;
 }

 /**
@ -694,17 +692,16 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,

 	if (ib.length_dw != 0) {
 		radeon_asic_vm_pad_ib(rdev, &ib);
-		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
-		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
+
+		radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true);
 		WARN_ON(ib.length_dw > ndw);
 		r = radeon_ib_schedule(rdev, &ib, NULL, false);
 		if (r) {
 			radeon_ib_free(rdev, &ib);
 			return r;
 		}
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
+		ib.fence->is_vm_update = true;
+		radeon_bo_fence(pd, ib.fence, false);
 	}
 	radeon_ib_free(rdev, &ib);

@ -803,7 +800,7 @@ static void radeon_vm_frag_ptes(struct radeon_device *rdev,
 *
 * Global and local mutex must be locked!
 */
-static void radeon_vm_update_ptes(struct radeon_device *rdev,
+static int radeon_vm_update_ptes(struct radeon_device *rdev,
 				  struct radeon_vm *vm,
 				  struct radeon_ib *ib,
 				  uint64_t start, uint64_t end,
@ -820,8 +817,12 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
 		unsigned nptes;
 		uint64_t pte;
+		int r;

-		radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
+		radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
+		r = reservation_object_reserve_shared(pt->tbo.resv);
+		if (r)
+			return r;

 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
@ -855,6 +856,33 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 				    last_pte + 8 * count,
 				    last_dst, flags);
 	}
+
+	return 0;
+}
+
+/**
+ * radeon_vm_fence_pts - fence page tables after an update
+ *
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @fence: fence to use
+ *
+ * Fence the page tables in the range @start - @end (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_fence_pts(struct radeon_vm *vm,
+				uint64_t start, uint64_t end,
+				struct radeon_fence *fence)
+{
+	unsigned i;
+
+	start >>= radeon_vm_block_size;
+	end >>= radeon_vm_block_size;
+
+	for (i = start; i <= end; ++i)
+		radeon_bo_fence(vm->page_tables[i].bo, fence, true);
 }

 /**
@ -887,11 +915,16 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return -EINVAL;
 	}

+	spin_lock(&vm->status_lock);
 	list_del_init(&bo_va->vm_status);
+	spin_unlock(&vm->status_lock);

 	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
 	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
 	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
+//   if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm))
+//       bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE;
+
 	if (mem) {
 		addr = mem->start << PAGE_SHIFT;
 		if (mem->mem_type != TTM_PL_SYSTEM) {
@ -953,23 +986,34 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return r;
 	ib.length_dw = 0;

-	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+	if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
+		unsigned i;
+
+		for (i = 0; i < RADEON_NUM_RINGS; ++i)
+			radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
+	}
+
+	r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
 			      bo_va->it.last + 1, addr,
 			      radeon_vm_page_flags(bo_va->flags));
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}

 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > ndw);

-	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
+	ib.fence->is_vm_update = true;
+	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
+	radeon_fence_unref(&bo_va->last_pt_update);
+	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
 	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);

 	return 0;
 }
@ -988,16 +1032,25 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 int radeon_vm_clear_freed(struct radeon_device *rdev,
 			  struct radeon_vm *vm)
 {
-	struct radeon_bo_va *bo_va, *tmp;
+	struct radeon_bo_va *bo_va;
 	int r;

-	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->freed)) {
+		bo_va = list_first_entry(&vm->freed,
+			struct radeon_bo_va, vm_status);
+		spin_unlock(&vm->status_lock);
+
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 		if (r)
 			return r;
+
+		spin_lock(&vm->status_lock);
 	}
+	spin_unlock(&vm->status_lock);
 	return 0;

 }
@ -1016,14 +1069,23 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
 int radeon_vm_clear_invalids(struct radeon_device *rdev,
 			     struct radeon_vm *vm)
 {
-	struct radeon_bo_va *bo_va, *tmp;
+	struct radeon_bo_va *bo_va;
 	int r;

-	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) {
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated,
+			struct radeon_bo_va, vm_status);
+		spin_unlock(&vm->status_lock);
+
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		if (r)
 			return r;
+
+		spin_lock(&vm->status_lock);
 	}
+	spin_unlock(&vm->status_lock);
+
 	return 0;
 }

@ -1046,14 +1108,17 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,

 	mutex_lock(&vm->mutex);
 	interval_tree_remove(&bo_va->it, &vm->va);
+	spin_lock(&vm->status_lock);
 	list_del(&bo_va->vm_status);

 	if (bo_va->addr) {
 		bo_va->bo = radeon_bo_ref(bo_va->bo);
 		list_add(&bo_va->vm_status, &vm->freed);
 	} else {
+		radeon_fence_unref(&bo_va->last_pt_update);
 	kfree(bo_va);
 	}
+	spin_unlock(&vm->status_lock);

 	mutex_unlock(&vm->mutex);
 }
@ -1074,10 +1139,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,

 	list_for_each_entry(bo_va, &bo->va, bo_list) {
 		if (bo_va->addr) {
-			mutex_lock(&bo_va->vm->mutex);
+			spin_lock(&bo_va->vm->status_lock);
 			list_del(&bo_va->vm_status);
 			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
-			mutex_unlock(&bo_va->vm->mutex);
+			spin_unlock(&bo_va->vm->status_lock);
 		}
 	}
 }
@ -1095,15 +1160,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
 		RADEON_VM_PTE_COUNT * 8);
 	unsigned pd_size, pd_entries, pts_size;
-	int r;
+	int i, r;

-	vm->id = 0;
 	vm->ib_bo_va = NULL;
-	vm->fence = NULL;
-	vm->last_flush = NULL;
-	vm->last_id_use = NULL;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		vm->ids[i].id = 0;
+		vm->ids[i].flushed_updates = NULL;
+		vm->ids[i].last_id_use = NULL;
+	}
 	mutex_init(&vm->mutex);
 	vm->va = RB_ROOT;
+	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->freed);

@ -1120,7 +1187,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)

 	r = radeon_bo_create(rdev, pd_size, align, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
-			     &vm->page_directory);
+			     NULL, &vm->page_directory);
 	if (r)
 		return r;

@ -1157,11 +1224,13 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
 			radeon_bo_unreserve(bo_va->bo);
+			radeon_fence_unref(&bo_va->last_pt_update);
 			kfree(bo_va);
 		}
 	}
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}

@ -1171,9 +1240,10 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)

 	radeon_bo_unref(&vm->page_directory);

-	radeon_fence_unref(&vm->fence);
-	radeon_fence_unref(&vm->last_flush);
-	radeon_fence_unref(&vm->last_id_use);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		radeon_fence_unref(&vm->ids[i].flushed_updates);
+		radeon_fence_unref(&vm->ids[i].last_id_use);
+	}

 	mutex_destroy(&vm->mutex);
 }
--- a/drivers/video/drm/radeon/rdisplay.c
+++ b/drivers/video/drm/radeon/rdisplay.c
@ -1,8 +1,6 @@

 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
-#include <drm.h>
-#include <drm_mm.h>
 #include "radeon.h"
 #include "radeon_object.h"
 #include "bitmap.h"
@ -34,7 +32,7 @@ int init_cursor(cursor_t *cursor)
    rdev = (struct radeon_device *)os_display->ddev->dev_private;

    r = radeon_bo_create(rdev, CURSOR_WIDTH*CURSOR_HEIGHT*4,
-                     PAGE_SIZE, false, RADEON_GEM_DOMAIN_VRAM, 0, NULL, &cursor->robj);
+                     4096, false, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &cursor->robj);

    if (unlikely(r != 0))
        return r;
@ -227,9 +225,9 @@ bool init_display(struct radeon_device *rdev, videomode_t *usermode)
 {
    struct drm_device   *dev;

-    cursor_t            *cursor;
-    bool                 retval = true;
-    u32_t                ifl;
+    cursor_t *cursor;
+    bool      retval = true;
+    u32       ifl;

    ENTER();

--- a/drivers/video/drm/radeon/rdisplay_kms.c
+++ b/drivers/video/drm/radeon/rdisplay_kms.c
@ -1,8 +1,6 @@

 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
-#include <drm.h>
-#include <drm_mm.h>
 #include "radeon.h"
 #include "radeon_object.h"
 #include "drm_fb_helper.h"
@ -406,8 +404,8 @@ int init_display_kms(struct drm_device *dev, videomode_t *usermode)
    struct drm_crtc         *crtc = NULL;
    struct drm_framebuffer  *fb;

-    cursor_t            *cursor;
-    u32_t                ifl;
+    cursor_t  *cursor;
+    u32        ifl;
    int        ret;

    mutex_lock(&dev->mode_config.mutex);
--- a/drivers/video/drm/radeon/rs600.c
+++ b/drivers/video/drm/radeon/rs600.c
@ -840,6 +840,9 @@ void rs600_bandwidth_update(struct radeon_device *rdev)
 	u32 d1mode_priority_a_cnt, d2mode_priority_a_cnt;
 	/* FIXME: implement full support */

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	if (rdev->mode_info.crtcs[0]->base.enabled)
--- a/drivers/video/drm/radeon/rs690.c
+++ b/drivers/video/drm/radeon/rs690.c
@ -579,6 +579,9 @@ void rs690_bandwidth_update(struct radeon_device *rdev)
 	u32 d1mode_priority_a_cnt, d1mode_priority_b_cnt;
 	u32 d2mode_priority_a_cnt, d2mode_priority_b_cnt;

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	if (rdev->mode_info.crtcs[0]->base.enabled)
--- a/drivers/video/drm/radeon/rs780_dpm.c
+++ b/drivers/video/drm/radeon/rs780_dpm.c
@ -24,6 +24,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "rs780d.h"
 #include "r600_dpm.h"
 #include "rs780_dpm.h"
--- a/drivers/video/drm/radeon/rv515.c
+++ b/drivers/video/drm/radeon/rv515.c
@ -1214,6 +1214,9 @@ void rv515_bandwidth_update(struct radeon_device *rdev)
 	struct drm_display_mode *mode0 = NULL;
 	struct drm_display_mode *mode1 = NULL;

+	if (!rdev->mode_info.mode_config_initialized)
+		return;
+
 	radeon_update_display_priority(rdev);

 	if (rdev->mode_info.crtcs[0]->base.enabled)
--- a/drivers/video/drm/radeon/rv6xx_dpm.c
+++ b/drivers/video/drm/radeon/rv6xx_dpm.c
@ -24,6 +24,7 @@

 #include "drmP.h"
 #include "radeon.h"
+#include "radeon_asic.h"
 #include "rv6xxd.h"
 #include "r600_dpm.h"
 #include "rv6xx_dpm.h"
--- a/Show More
+++ b/Show More