diff --git a/drivers/video/drm/i915/Gtt/intel-gtt.c b/drivers/video/drm/i915/Gtt/intel-gtt.c index 42e510402..791ee7eae 100644 --- a/drivers/video/drm/i915/Gtt/intel-gtt.c +++ b/drivers/video/drm/i915/Gtt/intel-gtt.c @@ -600,6 +600,7 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, } readl(intel_private.gtt+j-1); } +EXPORT_SYMBOL(intel_gtt_insert_pages); void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries) diff --git a/drivers/video/drm/i915/bitmap.c b/drivers/video/drm/i915/bitmap.c index 5b13259c0..96a59dd2f 100644 --- a/drivers/video/drm/i915/bitmap.c +++ b/drivers/video/drm/i915/bitmap.c @@ -8,11 +8,11 @@ void __attribute__((regparm(1))) destroy_bitmap(bitmap_t *bitmap) { -/* - * - * - * -*/ + printf("destroy bitmap %d\n", bitmap->handle); + free_handle(&bm_man, bitmap->handle); + bitmap->handle = 0; + i915_gem_object_unpin(bitmap->obj); + drm_gem_object_unreference(&bitmap->obj->base); __DestroyObject(bitmap); }; @@ -30,29 +30,47 @@ int init_bitmaps() }; -int create_bitmap(struct ubitmap *pbitmap) +int create_surface(struct io_call_10 *pbitmap) { struct drm_i915_gem_object *obj; bitmap_t *bitmap; u32 handle; - u32 width; - u32 height; - u32 size; - u32 pitch; + u32 width, max_width; + u32 height, max_height; + u32 size, max_size; + u32 pitch, max_pitch; void *uaddr; int ret; pbitmap->handle = 0; - pbitmap->data = NULL; + pbitmap->data = (void*)-1; - width = pbitmap->width; + width = pbitmap->width; height = pbitmap->height; +/* if((width==0)||(height==0)||(width>4096)||(height>4096)) goto err1; + if( ((pbitmap->max_width !=0 ) && + (pbitmap->max_width < width)) || + (pbitmap->max_width > 4096) ) + goto err1; + + if( ((pbitmap->max_height !=0 ) && + (pbitmap->max_height < width)) || + (pbitmap->max_height > 4096) ) + goto err1; + + if( pbitmap->format != 0) + goto err1; +*/ + + max_width = (pbitmap->max_width ==0) ? width : pbitmap->max_width; + max_height = (pbitmap->max_height==0) ? height : pbitmap->max_height; + handle = alloc_handle(&bm_man); // printf("%s %d\n",__FUNCTION__, handle); @@ -73,6 +91,7 @@ int create_bitmap(struct ubitmap *pbitmap) pitch = ALIGN(width*4,64); size = roundup(pitch*height, PAGE_SIZE); + // printf("pitch %d size %d\n", pitch, size); obj = i915_gem_alloc_object(main_device, size); @@ -83,48 +102,59 @@ int create_bitmap(struct ubitmap *pbitmap) if (ret) goto err3; - uaddr = UserAlloc(size); + max_pitch = ALIGN(max_width*4,64); + max_size = roundup(max_pitch*max_height, PAGE_SIZE); + + uaddr = UserAlloc(max_size); if( uaddr == NULL) goto err4; else { u32_t *src, *dst; - int count; + u32 count, max_count; #define page_tabs 0xFDC00000 /* really dirty hack */ src = (u32_t*)obj->pages; dst = &((u32_t*)page_tabs)[(u32_t)uaddr >> 12]; count = size/4096; + max_count = max_size/4096 - count; while(count--) { *dst++ = (0xFFFFF000 & *src++) | 0x207 ; // map as shared page }; +// while(max_count--) +// *dst++ = 0; // cleanup unused space } bitmap->handle = handle; - bitmap->width = width; - bitmap->height = height; + bitmap->uaddr = uaddr; bitmap->pitch = pitch; bitmap->gaddr = obj->gtt_offset; - bitmap->uaddr = uaddr; + + bitmap->width = width; + bitmap->height = height; + bitmap->max_width = max_width; + bitmap->max_height = max_height; + bitmap->obj = obj; bitmap->header.destroy = destroy_bitmap; - pbitmap->pitch = pitch; pbitmap->handle = handle; pbitmap->data = uaddr; + pbitmap->pitch = pitch; -// printf("%s handle %d pitch %d gpu %x user %x\n", -// __FUNCTION__, handle, pitch, obj->gtt_offset, uaddr); + + printf("%s handle: %d pitch: %d gpu_addr: %x user_addr: %x\n", + __FUNCTION__, handle, pitch, obj->gtt_offset, uaddr); return 0; err4: -// drm_gem_object_unpin; + i915_gem_object_unpin(obj); err3: -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); err2: free_handle(&bm_man, handle); __DestroyObject(bitmap); @@ -134,6 +164,8 @@ err1: }; + + int init_hman(struct hman *man, u32 count) { u32* data; diff --git a/drivers/video/drm/i915/bitmap.h b/drivers/video/drm/i915/bitmap.h index 31a07ee34..a6887abfc 100644 --- a/drivers/video/drm/i915/bitmap.h +++ b/drivers/video/drm/i915/bitmap.h @@ -36,25 +36,36 @@ typedef struct { kobj_t header; - u32 handle; - u32 width; - u32 height; - u32 pitch; - u32 gaddr; - void *uaddr; + u32 handle; + void *uaddr; + + u32 pitch; + u32 gaddr; + + u32 width; + u32 height; + u32 max_width; + u32 max_height; + + u32 format; struct drm_i915_gem_object *obj; }bitmap_t; -struct ubitmap +struct io_call_10 /* SRV_CREATE_SURFACE */ { - u32 width; - u32 height; - u32 pitch; - u32 handle; - void *data; + u32 handle; // ignored + void *data; // ignored + + u32 width; + u32 height; + u32 pitch; // ignored + + u32 max_width; + u32 max_height; + u32 format; // reserved mbz }; -int create_bitmap(struct ubitmap *pbitmap); +int create_surface(struct io_call_10 *pbitmap); int init_bitmaps(); diff --git a/drivers/video/drm/i915/clip.inc b/drivers/video/drm/i915/clip.inc new file mode 100644 index 000000000..607cab97d --- /dev/null +++ b/drivers/video/drm/i915/clip.inc @@ -0,0 +1,121 @@ + +#define CLIP_TOP 1 +#define CLIP_BOTTOM 2 +#define CLIP_RIGHT 4 +#define CLIP_LEFT 8 + +typedef struct +{ + int xmin; + int ymin; + int xmax; + int ymax; +}clip_t; + + +static int _L1OutCode( clip_t *clip, int x, int y ) +/*================================= + + Verify that a point is inside or outside the active viewport. */ +{ + int flag; + + flag = 0; + if( x < clip->xmin ) { + flag |= CLIP_LEFT; + } else if( x > clip->xmax ) { + flag |= CLIP_RIGHT; + } + if( y < clip->ymin ) { + flag |= CLIP_TOP; + } else if( y > clip->ymax ) { + flag |= CLIP_BOTTOM; + } + return( flag ); +}; + +static void block_inter( clip_t *clip, int *x, int *y, int flag ) +/*====================================================== + + Find the intersection of a block with a boundary of the viewport. */ +{ + if( flag & CLIP_TOP ) { + *y = clip->ymin; + } else if( flag & CLIP_BOTTOM ) { + *y = clip->ymax; + } else if( flag & CLIP_RIGHT ) { + *x = clip->xmax; + } else if( flag & CLIP_LEFT ) { + *x = clip->xmin; + } +} + + +int BlockClip(clip_t *clip, int *x1, int *y1, int *x2, int* y2 ) +/*============================================================== + + Clip a block with opposite corners (x1,y1) and (x2,y2) to the + active viewport based on the Cohen-Sutherland algorithm for line + clipping. Return the clipped coordinates and a decision drawing + flag ( 0 draw : 1 don't draw ). */ +{ + int flag1; + int flag2; + + flag1 = _L1OutCode( clip, *x1, *y1 ); + flag2 = _L1OutCode( clip, *x2, *y2 ); + for( ;; ) { + if( flag1 & flag2 ) break; /* trivially outside */ + if( flag1 == flag2 ) break; /* completely inside */ + if( flag1 == 0 ) { + block_inter( clip, x2, y2, flag2 ); + flag2 = _L1OutCode( clip, *x2, *y2 ); + } else { + block_inter( clip, x1, y1, flag1 ); + flag1 = _L1OutCode( clip, *x1, *y1 ); + } + } + return( flag1 & flag2 ); +} + + +int blit_clip(clip_t *dst_clip,int *dst_x,int *dst_y, + clip_t *src_clip,int *src_x, int *src_y, + u32_t *w, u32_t *h) +{ + int sx0, sy0, sx1, sy1; + + sx0 = *src_x; + sy0 = *src_y; + + sx1 = sx0 + *w - 1; + sy1 = sy0 + *h - 1; + + + if( ! BlockClip( src_clip, &sx0, &sy0, &sx1, &sy1)) + { + int dx0, dy0, dx1, dy1; + + dx0 = *dst_x + sx0 - *src_x; + dy0 = *dst_y + sy0 - *src_y; + + dx1 = dx0 + sx1 - sx0; + dy1 = dy0 + sy1 - sy0; + + if( ! BlockClip( dst_clip, &dx0, &dy0, &dx1, &dy1)) + { + *w = dx1 - dx0 + 1; + *h = dy1 - dy0 + 1; + + *src_x += dx0 - *dst_x; + *src_y += dy0 - *dst_y; + + *dst_x = dx0; + *dst_y = dy0; + + return 0; + }; + } + return 1; +}; + diff --git a/drivers/video/drm/i915/i915_drv.h b/drivers/video/drm/i915/i915_drv.h index 9ddf9027c..32aedc4d8 100644 --- a/drivers/video/drm/i915/i915_drv.h +++ b/drivers/video/drm/i915/i915_drv.h @@ -756,6 +756,8 @@ enum i915_cache_level { struct drm_i915_gem_object { struct drm_gem_object base; + void *mapped; + /** Current space allocated to this object in the GTT, if any. */ struct drm_mm_node *gtt_space; struct list_head gtt_list; diff --git a/drivers/video/drm/i915/i915_gem.c b/drivers/video/drm/i915/i915_gem.c index 36e1b4463..fb25c8f32 100644 --- a/drivers/video/drm/i915/i915_gem.c +++ b/drivers/video/drm/i915/i915_gem.c @@ -36,6 +36,19 @@ //#include #include +extern int x86_clflush_size; + +#undef mb +#undef rmb +#undef wmb +#define mb() asm volatile("mfence") +#define rmb() asm volatile ("lfence") +#define wmb() asm volatile ("sfence") + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char*)__p)); +} #define MAX_ERRNO 4095 @@ -56,6 +69,16 @@ static inline long PTR_ERR(const void *ptr) return (long) ptr; } +void +drm_gem_object_free(struct kref *kref) +{ + struct drm_gem_object *obj = (struct drm_gem_object *) kref; + struct drm_device *dev = obj->dev; + + BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + + i915_gem_free_object(obj); +} /** * Initialize an already allocated GEM object of the specified size with @@ -67,13 +90,16 @@ int drm_gem_object_init(struct drm_device *dev, BUG_ON((size & (PAGE_SIZE - 1)) != 0); obj->dev = dev; + kref_init(&obj->refcount); atomic_set(&obj->handle_count, 0); obj->size = size; return 0; } - +void +drm_gem_object_release(struct drm_gem_object *obj) +{ } #define I915_EXEC_CONSTANTS_MASK (3<<6) @@ -264,7 +290,6 @@ i915_gem_create(struct drm_file *file, /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference(&obj->base); -// trace_i915_gem_object_create(obj); *handle_p = handle; return 0; @@ -755,8 +780,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, return 0; err_pages: -// while (i--) -// page_cache_release(obj->pages[i]); + while (i--) + FreePage(obj->pages[i]); free(obj->pages); obj->pages = NULL; @@ -769,6 +794,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) int page_count = obj->base.size / PAGE_SIZE; int i; + ENTER(); + BUG_ON(obj->madv == __I915_MADV_PURGED); // if (obj->tiling_mode != I915_TILING_NONE) @@ -776,21 +803,16 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0; -/* It's a swap!!! + for (i = 0; i < page_count; i++) { - if (obj->dirty) - set_page_dirty(obj->pages[i]); - - if (obj->madv == I915_MADV_WILLNEED) - mark_page_accessed(obj->pages[i]); - - //page_cache_release(obj->pages[i]); + FreePage(obj->pages[i]); } obj->dirty = 0; -*/ free(obj->pages); obj->pages = NULL; + + LEAVE(); } void @@ -806,7 +828,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, /* Add a reference if we're newly entering the active list. */ if (!obj->active) { -// drm_gem_object_reference(&obj->base); + drm_gem_object_reference(&obj->base); obj->active = 1; } @@ -828,9 +850,50 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, } } +static void +i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) +{ + list_del_init(&obj->ring_list); + obj->last_rendering_seqno = 0; +} + +static void +i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; + + BUG_ON(!obj->active); + list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); + + i915_gem_object_move_off_active(obj); +} + + +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_i915_gem_object *obj) +{ + struct inode *inode; + + /* Our goal here is to return as much of the memory as + * is possible back to the system as we are called from OOM. + * To do this we must instruct the shmfs to drop all of its + * backing pages, *now*. + */ + + obj->madv = __I915_MADV_PURGED; +} + +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) +{ + return obj->madv == I915_MADV_DONTNEED; +} + static void i915_gem_process_flushing_list(struct intel_ring_buffer *ring, uint32_t flush_domains) @@ -848,9 +911,6 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, i915_gem_object_move_to_active(obj, ring, i915_gem_next_request_seqno(ring)); -// trace_i915_gem_object_change_domain(obj, -// obj->base.read_domains, -// old_write_domain); } } } @@ -874,59 +934,6 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -963,15 +970,102 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) /* If there is rendering queued on the buffer being evicted, wait for * it. */ -// if (obj->active) { + if (obj->active) { // ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); // if (ret) // return ret; -// } + } return 0; } +static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) +{ + u32 old_write_domain, old_read_domains; + + /* Act a barrier for all accesses through the GTT */ + mb(); + + /* Force a pagefault for domain tracking on next user access */ +// i915_gem_release_mmap(obj); + + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + return; + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; + +} + +/** + * Unbinds an object from the GTT aperture. + */ +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) +{ + int ret = 0; + + ENTER(); + if (obj->gtt_space == NULL) + return 0; + + if (obj->pin_count != 0) { + DRM_ERROR("Attempting to unbind pinned buffer\n"); + return -EINVAL; + } + + ret = i915_gem_object_finish_gpu(obj); + if (ret == -ERESTARTSYS) + return ret; + /* Continue on if we fail due to EIO, the GPU is hung so we + * should be safe and we need to cleanup or else we might + * cause memory corruption through use-after-free. + */ + + i915_gem_object_finish_gtt(obj); + + /* Move the object to the CPU domain to ensure that + * any possible CPU writes while it's not in the GTT + * are flushed when we go to remap it. + */ + if (ret == 0) + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret == -ERESTARTSYS) + return ret; + if (ret) { + /* In the event of a disaster, abandon all caches and + * hope for the best. + */ + i915_gem_clflush_object(obj); + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; + } + + /* release the fence reg _after_ flushing */ + ret = i915_gem_object_put_fence(obj); + if (ret == -ERESTARTSYS) + return ret; + + + i915_gem_gtt_unbind_object(obj); + i915_gem_object_put_pages_gtt(obj); + + list_del_init(&obj->gtt_list); + list_del_init(&obj->mm_list); + /* Avoid an unnecessary call to unbind on rebind. */ + obj->map_and_fenceable = true; + + drm_mm_put_block(obj->gtt_space); + obj->gtt_space = NULL; + obj->gtt_offset = 0; + + if (i915_gem_object_is_purgeable(obj)) + i915_gem_object_truncate(obj); + + LEAVE(); + return ret; +} int i915_gem_flush_ring(struct intel_ring_buffer *ring, @@ -983,7 +1077,6 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) return 0; -// trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) @@ -995,6 +1088,141 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, return 0; } +static int i915_ring_idle(struct intel_ring_buffer *ring) +{ + int ret; + + if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) + return 0; + + if (!list_empty(&ring->gpu_write_list)) { + ret = i915_gem_flush_ring(ring, + I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + } + + return 0; //i915_wait_request(ring, i915_gem_next_request_seqno(ring)); +} + +int +i915_gpu_idle(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret, i; + + /* Flush everything onto the inactive list. */ + for (i = 0; i < I915_NUM_RINGS; i++) { + ret = i915_ring_idle(&dev_priv->ring[i]); + if (ret) + return ret; + } + + return 0; +} + + + + + + + + + + + + + + + + + + + + + + +static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) +{ + return i915_seqno_passed(ring->get_seqno(ring), seqno); +} + +static int +i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *pipelined) +{ + int ret; + + if (obj->fenced_gpu_access) { + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { + ret = i915_gem_flush_ring(obj->last_fenced_ring, + 0, obj->base.write_domain); + if (ret) + return ret; + } + + obj->fenced_gpu_access = false; + } + + if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { + if (!ring_passed_seqno(obj->last_fenced_ring, + obj->last_fenced_seqno)) { +// ret = i915_wait_request(obj->last_fenced_ring, +// obj->last_fenced_seqno); +// if (ret) +// return ret; + } + + obj->last_fenced_seqno = 0; + obj->last_fenced_ring = NULL; + } + + /* Ensure that all CPU reads are completed before installing a fence + * and all writes before removing the fence. + */ + if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) + mb(); + + return 0; +} + +int +i915_gem_object_put_fence(struct drm_i915_gem_object *obj) +{ + int ret; + +// if (obj->tiling_mode) +// i915_gem_release_mmap(obj); + + ret = i915_gem_object_flush_fence(obj, NULL); + if (ret) + return ret; + + if (obj->fence_reg != I915_FENCE_REG_NONE) { + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + i915_gem_clear_fence_reg(obj->base.dev, + &dev_priv->fence_regs[obj->fence_reg]); + + obj->fence_reg = I915_FENCE_REG_NONE; + } + + return 0; +} + + + + + + + + + + + + + + + @@ -1164,7 +1392,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, ret = i915_gem_gtt_bind_object(obj); if (ret) { -// i915_gem_object_put_pages_gtt(obj); + i915_gem_object_put_pages_gtt(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; @@ -1195,7 +1423,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->map_and_fenceable = mappable && fenceable; -// trace_i915_gem_object_bind(obj, map_and_fenceable); return 0; } @@ -1220,12 +1447,52 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) if (obj->cache_level != I915_CACHE_NONE) return; -// trace_i915_gem_object_clflush(obj); + if(obj->mapped != NULL) + { + uint8_t *page_virtual; + unsigned int i; -// drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); - mb(); - __asm__ ("wbinvd"); // this is really ugly - mb(); + page_virtual = obj->mapped; + asm volatile("mfence"); + for (i = 0; i < obj->base.size; i += x86_clflush_size) + clflush(page_virtual + i); + asm volatile("mfence"); + } + else + { + uint8_t *page_virtual; + unsigned int i; + page_virtual = AllocKernelSpace(obj->base.size); + if(page_virtual != NULL) + { + u32_t *src, *dst; + u32 count; + +#define page_tabs 0xFDC00000 /* really dirty hack */ + + src = (u32_t*)obj->pages; + dst = &((u32_t*)page_tabs)[(u32_t)page_virtual >> 12]; + count = obj->base.size/4096; + + while(count--) + { + *dst++ = (0xFFFFF000 & *src++) | 0x001 ; + }; + + asm volatile("mfence"); + for (i = 0; i < obj->base.size; i += x86_clflush_size) + clflush(page_virtual + i); + asm volatile("mfence"); + FreeKernelSpace(page_virtual); + } + else + { + asm volatile ( + "mfence \n" + "wbinvd \n" /* this is really ugly */ + "mfence"); + } + } } /** Flushes any GPU write domain for the object if it's dirty. */ @@ -1239,10 +1506,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); } +/** Flushes the GTT write domain for the object if it's dirty. */ +static void +i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) +{ + uint32_t old_write_domain; + if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) + return; + /* No actual flushing is required for the GTT write domain. Writes + * to it immediately go to main memory as far as we know, so there's + * no chipset flush. It also doesn't land in render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + */ + wmb(); + old_write_domain = obj->base.write_domain; + obj->base.write_domain = 0; +} /** Flushes the CPU write domain for the object if it's dirty. */ static void @@ -1258,9 +1544,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; -// trace_i915_gem_object_change_domain(obj, -// obj->base.read_domains, -// old_write_domain); } /** @@ -1363,10 +1646,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - } + } obj->cache_level = cache_level; return 0; @@ -1433,34 +1713,110 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; -// trace_i915_gem_object_change_domain(obj, -// old_read_domains, -// old_write_domain); return 0; } +int +i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) +{ + int ret; + + if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) + return 0; + + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { + ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); + if (ret) + return ret; + } + + /* Ensure that we invalidate the GPU's caches and TLBs. */ + obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; + + return i915_gem_object_wait_rendering(obj); +} + +/** + * Moves a single object to the CPU read, and possibly write domain. + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ +static int +i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) +{ + uint32_t old_write_domain, old_read_domains; + int ret; + + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return 0; + + ret = i915_gem_object_flush_gpu_write_domain(obj); + if (ret) + return ret; + + ret = i915_gem_object_wait_rendering(obj); + if (ret) + return ret; + + i915_gem_object_flush_gtt_write_domain(obj); + + /* If we have a partially-valid cache of the object in the CPU, + * finish invalidating it and free the per-page flags. + */ + i915_gem_object_set_to_full_cpu_read_domain(obj); + + old_write_domain = obj->base.write_domain; + old_read_domains = obj->base.read_domains; + + /* Flush the CPU cache if it's still invalid. */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { + i915_gem_clflush_object(obj); + + obj->base.read_domains |= I915_GEM_DOMAIN_CPU; + } + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + + /* If we're writing through the CPU, then the GPU read domains will + * need to be invalidated at next use. + */ + if (write) { + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + } + return 0; +} +/** + * Moves the object from a partially CPU read to a full one. + * + * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), + * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). + */ +static void +i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) +{ + if (!obj->page_cpu_valid) + return; + /* If we're partially in the CPU read domain, finish moving it in. + */ + if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { + } - - - - - - - - - - - - - - - - + /* Free the page_cpu_valid mappings which are now stale, whether + * or not we've got I915_GEM_DOMAIN_CPU. + */ + kfree(obj->page_cpu_valid); + obj->page_cpu_valid = NULL; +} @@ -1495,7 +1851,6 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, int ret; BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); -// WARN_ON(i915_verify_lists(dev)); #if 0 if (obj->gtt_space != NULL) { @@ -1529,12 +1884,25 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } obj->pin_mappable |= map_and_fenceable; -// WARN_ON(i915_verify_lists(dev)); return 0; } +void +i915_gem_object_unpin(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; + BUG_ON(obj->pin_count == 0); + BUG_ON(obj->gtt_space == NULL); + if (--obj->pin_count == 0) { + if (!obj->active) + list_move_tail(&obj->mm_list, + &dev_priv->mm.inactive_list); + obj->pin_mappable = false; + } +} @@ -1619,17 +1987,56 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, return obj; } +int i915_gem_init_object(struct drm_gem_object *obj) +{ + BUG(); + + return 0; +} + +static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; + int ret; + + ENTER(); + + ret = i915_gem_object_unbind(obj); + if (ret == -ERESTARTSYS) { + list_move(&obj->mm_list, + &dev_priv->mm.deferred_free_list); + return; + } +// if (obj->base.map_list.map) +// drm_gem_free_mmap_offset(&obj->base); + drm_gem_object_release(&obj->base); + i915_gem_info_remove_obj(dev_priv, obj->base.size); + kfree(obj->page_cpu_valid); + kfree(obj->bit_17); + kfree(obj); + LEAVE(); +} +void i915_gem_free_object(struct drm_gem_object *gem_obj) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); + struct drm_device *dev = obj->base.dev; + ENTER(); + while (obj->pin_count > 0) + i915_gem_object_unpin(obj); +// if (obj->phys_obj) +// i915_gem_detach_phys_object(dev, obj); - - - + i915_gem_free_object_tail(obj); + LEAVE(); +} @@ -1784,9 +2191,6 @@ i915_gem_load(struct drm_device *dev) init_ring_lists(&dev_priv->ring[i]); for (i = 0; i < I915_MAX_NUM_FENCES; i++) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); -// INIT_DELAYED_WORK(&dev_priv->mm.retire_work, -// i915_gem_retire_work_handler); -// init_completion(&dev_priv->error_completion); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { @@ -1811,7 +2215,6 @@ i915_gem_load(struct drm_device *dev) } i915_gem_detect_bit_6_swizzle(dev); -// init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; diff --git a/drivers/video/drm/i915/i915_gem_gtt.c b/drivers/video/drm/i915/i915_gem_gtt.c index 5509921c2..767f9f2be 100644 --- a/drivers/video/drm/i915/i915_gem_gtt.c +++ b/drivers/video/drm/i915/i915_gem_gtt.c @@ -53,8 +53,29 @@ static unsigned int cache_level_to_agp_type(struct drm_device *dev, } } -#if 0 +static bool do_idling(struct drm_i915_private *dev_priv) +{ + bool ret = dev_priv->mm.interruptible; + if (unlikely(dev_priv->mm.gtt->do_idle_maps)) { + dev_priv->mm.interruptible = false; + if (i915_gpu_idle(dev_priv->dev)) { + DRM_ERROR("Couldn't idle GPU\n"); + /* Wait a bit, in hopes it avoids the hang */ + udelay(10); + } + } + + return ret; +} + +static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) +{ + if (unlikely(dev_priv->mm.gtt->do_idle_maps)) + dev_priv->mm.interruptible = interruptible; +} + +#if 0 void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -123,15 +144,23 @@ void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, agp_type); } +#endif + void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + bool interruptible; + + interruptible = do_idling(dev_priv); + intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT); if (obj->sg_list) { - intel_gtt_unmap_memory(obj->sg_list, obj->num_sg); +// intel_gtt_unmap_memory(obj->sg_list, obj->num_sg); obj->sg_list = NULL; } -} -#endif + undo_idling(dev_priv, interruptible); +} diff --git a/drivers/video/drm/i915/intel_display.c b/drivers/video/drm/i915/intel_display.c index a8253b610..45e4cf304 100644 --- a/drivers/video/drm/i915/intel_display.c +++ b/drivers/video/drm/i915/intel_display.c @@ -2006,7 +2006,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, return 0; err_unpin: -// i915_gem_object_unpin(obj); + i915_gem_object_unpin(obj); err_interruptible: dev_priv->mm.interruptible = true; return ret; @@ -2223,7 +2223,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, LEAVE_ATOMIC_MODE_SET); if (ret) { -// i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); DRM_ERROR("failed to update base address\n"); LEAVE(); @@ -3310,7 +3310,7 @@ static void intel_crtc_disable(struct drm_crtc *crtc) if (crtc->fb) { mutex_lock(&dev->struct_mutex); -// i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); } } @@ -6299,7 +6299,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, int depth, int bpp) { struct drm_i915_gem_object *obj; - struct drm_mode_fb_cmd mode_cmd; + struct drm_mode_fb_cmd2 mode_cmd; // obj = i915_gem_alloc_object(dev, // intel_framebuffer_size_for_mode(mode, bpp)); @@ -6658,8 +6658,6 @@ static void intel_increase_pllclock(struct drm_crtc *crtc) LEAVE(); /* Schedule downclock */ -// mod_timer(&intel_crtc->idle_timer, jiffies + -// msecs_to_jiffies(CRTC_IDLE_TIMEOUT)); } @@ -6892,8 +6890,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->busy = false; -// setup_timer(&intel_crtc->idle_timer, intel_crtc_idle_timer, -// (unsigned long)intel_crtc); } diff --git a/drivers/video/drm/i915/intel_fb.c b/drivers/video/drm/i915/intel_fb.c index ac6fcacfa..02d8b2fe8 100644 --- a/drivers/video/drm/i915/intel_fb.c +++ b/drivers/video/drm/i915/intel_fb.c @@ -144,7 +144,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, obj->gtt_space = &lfb_vm_node; obj->gtt_offset = 0; - obj->pin_count = 1; + obj->pin_count = 2; } /***********************************************************************/ @@ -200,9 +200,9 @@ static int intelfb_create(struct intel_fbdev *ifbdev, return 0; out_unpin: -// i915_gem_object_unpin(obj); + i915_gem_object_unpin(obj); out_unref: -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); mutex_unlock(&dev->struct_mutex); out: return ret; diff --git a/drivers/video/drm/i915/intel_ringbuffer.c b/drivers/video/drm/i915/intel_ringbuffer.c index 246ae653b..c18cdafaa 100644 --- a/drivers/video/drm/i915/intel_ringbuffer.c +++ b/drivers/video/drm/i915/intel_ringbuffer.c @@ -364,9 +364,9 @@ init_pipe_control(struct intel_ring_buffer *ring) return 0; err_unpin: -// i915_gem_object_unpin(obj); + i915_gem_object_unpin(obj); err_unref: -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); err: kfree(pc); return ret; @@ -383,8 +383,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) obj = pc->obj; // kunmap(obj->pages[0]); -// i915_gem_object_unpin(obj); -// drm_gem_object_unreference(&obj->base); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(&obj->base); kfree(pc); ring->private = NULL; @@ -948,8 +948,8 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) return; kunmap(obj->pages[0]); -// i915_gem_object_unpin(obj); -// drm_gem_object_unreference(&obj->base); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(&obj->base); ring->status_page.obj = NULL; memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); @@ -992,9 +992,9 @@ static int init_status_page(struct intel_ring_buffer *ring) return 0; err_unpin: - // i915_gem_object_unpin(obj); + i915_gem_object_unpin(obj); err_unref: - // drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); err: return ret; } @@ -1065,12 +1065,11 @@ int intel_init_ring_buffer(struct drm_device *dev, return 0; err_unmap: -// drm_core_ioremapfree(&ring->map, dev); FreeKernelSpace(ring->virtual_start); err_unpin: -// i915_gem_object_unpin(obj); + i915_gem_object_unpin(obj); err_unref: -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); ring->obj = NULL; err_hws: // cleanup_status_page(ring); @@ -1096,8 +1095,8 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) // drm_core_ioremapfree(&ring->map, ring->dev); -// i915_gem_object_unpin(ring->obj); -// drm_gem_object_unreference(&ring->obj->base); + i915_gem_object_unpin(ring->obj); + drm_gem_object_unreference(&ring->obj->base); ring->obj = NULL; if (ring->cleanup) @@ -1156,7 +1155,7 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) }; } -// trace_i915_ring_wait_begin(ring); + end = jiffies + 3 * HZ; do { ring->head = I915_READ_HEAD(ring); @@ -1174,7 +1173,6 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) return -EAGAIN; }; } while (!time_after(jiffies, end)); -// trace_i915_ring_wait_end(ring); LEAVE(); return -EBUSY; @@ -1411,21 +1409,26 @@ static int blt_ring_init(struct intel_ring_buffer *ring) ret = i915_gem_object_pin(obj, 4096, true); if (ret) { -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); return ret; } - ptr = ioremap(obj->pages[0], 4096); + ptr = MapIoMem(obj->pages[0], 4096, PG_SW); + obj->mapped = ptr; + *ptr++ = MI_BATCH_BUFFER_END; *ptr++ = MI_NOOP; -// iounmap(obj->pages[0]); ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) { -// i915_gem_object_unpin(obj); -// drm_gem_object_unreference(&obj->base); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(&obj->base); + FreeKernelSpace(ptr); + obj->mapped = NULL; return ret; } + FreeKernelSpace(ptr); + obj->mapped = NULL; ring->private = obj; } diff --git a/drivers/video/drm/i915/intel_sprite.c b/drivers/video/drm/i915/intel_sprite.c index dec108ffb..2288abf88 100644 --- a/drivers/video/drm/i915/intel_sprite.c +++ b/drivers/video/drm/i915/intel_sprite.c @@ -501,7 +501,7 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, intel_wait_for_vblank(dev, to_intel_crtc(crtc)->pipe); mutex_lock(&dev->struct_mutex); } -// i915_gem_object_unpin(old_obj); + i915_gem_object_unpin(old_obj); } out_unlock: @@ -528,7 +528,7 @@ intel_disable_plane(struct drm_plane *plane) goto out; mutex_lock(&dev->struct_mutex); -// i915_gem_object_unpin(intel_plane->obj); + i915_gem_object_unpin(intel_plane->obj); intel_plane->obj = NULL; mutex_unlock(&dev->struct_mutex); out: diff --git a/drivers/video/drm/i915/kms_display.c b/drivers/video/drm/i915/kms_display.c index a6eab143f..e0325201a 100644 --- a/drivers/video/drm/i915/kms_display.c +++ b/drivers/video/drm/i915/kms_display.c @@ -428,7 +428,7 @@ int init_cursor(cursor_t *cursor) ret = i915_gem_object_pin(obj, CURSOR_WIDTH*CURSOR_HEIGHT*4, true); if (ret) { -// drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); return ret; } @@ -440,8 +440,8 @@ int init_cursor(cursor_t *cursor) if (unlikely(bits == NULL)) { -// i915_gem_object_unpin(obj); -// drm_gem_object_unreference(&obj->base); + i915_gem_object_unpin(obj); + drm_gem_object_unreference(&obj->base); return -ENOMEM; }; cursor->cobj = obj; @@ -807,7 +807,7 @@ int blit_video(u32 hbitmap, int dst_x, int dst_y, if( n & 1) b[n++] = MI_NOOP; -// i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_set_to_gtt_domain(bitmap->obj, false); if (HAS_BLT(main_device)) ring = &dev_priv->ring[BCS]; @@ -816,9 +816,11 @@ int blit_video(u32 hbitmap, int dst_x, int dst_y, ring->dispatch_execbuffer(ring, cmd_offset, n*4); - intel_ring_begin(ring, 4); -// if (ret) -// return ret; + int ret; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; intel_ring_emit(ring, MI_FLUSH_DW); intel_ring_emit(ring, 0); diff --git a/drivers/video/drm/i915/main.c b/drivers/video/drm/i915/main.c index 43040f042..7e2ab4e26 100644 --- a/drivers/video/drm/i915/main.c +++ b/drivers/video/drm/i915/main.c @@ -14,6 +14,8 @@ #include "bitmap.h" +void cpu_detect(); + void parse_cmdline(char *cmdline, char *log); int _stdcall display_handler(ioctl_t *io); int init_agp(void); @@ -24,6 +26,8 @@ int video_blit(uint64_t src_offset, int x, int y, static char log[256]; +int x86_clflush_size; + int i915_modeset = 1; u32_t drvEntry(int action, char *cmdline) @@ -71,15 +75,20 @@ u32_t drvEntry(int action, char *cmdline) return err; }; -#define API_VERSION 0x01000100 +#define CURRENT_API 0x0200 /* 2.00 */ +#define COMPATIBLE_API 0x0100 /* 1.00 */ -#define SRV_GETVERSION 0 -#define SRV_ENUM_MODES 1 -#define SRV_SET_MODE 2 +#define API_VERSION (COMPATIBLE_API << 16) | CURRENT_API +#define DISPLAY_VERSION CURRENT_API -#define SRV_CREATE_BITMAP 10 -#define SRV_BLIT_VIDEO 20 +#define SRV_GETVERSION 0 +#define SRV_ENUM_MODES 1 +#define SRV_SET_MODE 2 + +#define SRV_CREATE_SURFACE 10 + +#define SRV_BLIT_VIDEO 20 #define check_input(size) \ if( unlikely((inp==NULL)||(io->inp_size != (size))) ) \ @@ -102,7 +111,7 @@ int _stdcall display_handler(ioctl_t *io) { case SRV_GETVERSION: check_output(4); - *outp = API_VERSION; + *outp = DISPLAY_VERSION; retval = 0; break; @@ -123,9 +132,9 @@ int _stdcall display_handler(ioctl_t *io) retval = set_user_mode((videomode_t*)inp); break; - case SRV_CREATE_BITMAP: - check_input(5); - retval = create_bitmap((struct ubitmap*)inp); + case SRV_CREATE_SURFACE: +// check_input(8); + retval = create_surface((struct io_call_10*)inp); break; @@ -202,3 +211,37 @@ void parse_cmdline(char *cmdline, char *log) }; }; +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "" (*eax), "2" (*ecx)); +} + +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +void cpu_detect() +{ + u32 junk, tfms, cap0, misc; + + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + + if (cap0 & (1<<19)) + { + x86_clflush_size = ((misc >> 8) & 0xff) * 8; + } +} +