|
28 | 28 | *
|
29 | 29 | * Also need to add code to deal with cards endians that are different than
|
30 | 30 | * the native cpu endians. I also need to deal with MSB position in the word.
|
| 31 | + * Modified by Harm Hanemaaijer ([email protected]) 2013: |
| 32 | + * - Provide optimized versions of fast_imageblit for 16 and 32bpp that are |
| 33 | + * significantly faster than the previous implementation. |
| 34 | + * - Simplify the fast/slow_imageblit selection code, avoiding integer |
| 35 | + * divides. |
31 | 36 | */
|
32 | 37 | #include <linux/module.h>
|
33 | 38 | #include <linux/string.h>
|
@@ -262,6 +267,133 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
|
262 | 267 | }
|
263 | 268 | }
|
264 | 269 |
|
| 270 | +/* |
| 271 | + * Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded |
| 272 | + * into the code, main loop unrolled. |
| 273 | + */ |
| 274 | + |
| 275 | +static inline void fast_imageblit16(const struct fb_image *image, |
| 276 | + struct fb_info *p, u8 __iomem * dst1, |
| 277 | + u32 fgcolor, u32 bgcolor) |
| 278 | +{ |
| 279 | + u32 fgx = fgcolor, bgx = bgcolor; |
| 280 | + u32 spitch = (image->width + 7) / 8; |
| 281 | + u32 end_mask, eorx; |
| 282 | + const char *s = image->data, *src; |
| 283 | + u32 __iomem *dst; |
| 284 | + const u32 *tab = NULL; |
| 285 | + int i, j, k; |
| 286 | + |
| 287 | + tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le; |
| 288 | + |
| 289 | + fgx <<= 16; |
| 290 | + bgx <<= 16; |
| 291 | + fgx |= fgcolor; |
| 292 | + bgx |= bgcolor; |
| 293 | + |
| 294 | + eorx = fgx ^ bgx; |
| 295 | + k = image->width / 2; |
| 296 | + |
| 297 | + for (i = image->height; i--;) { |
| 298 | + dst = (u32 __iomem *) dst1; |
| 299 | + src = s; |
| 300 | + |
| 301 | + j = k; |
| 302 | + while (j >= 4) { |
| 303 | + u8 bits = *src; |
| 304 | + end_mask = tab[(bits >> 6) & 3]; |
| 305 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 306 | + end_mask = tab[(bits >> 4) & 3]; |
| 307 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 308 | + end_mask = tab[(bits >> 2) & 3]; |
| 309 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 310 | + end_mask = tab[bits & 3]; |
| 311 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 312 | + src++; |
| 313 | + j -= 4; |
| 314 | + } |
| 315 | + if (j != 0) { |
| 316 | + u8 bits = *src; |
| 317 | + end_mask = tab[(bits >> 6) & 3]; |
| 318 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 319 | + if (j >= 2) { |
| 320 | + end_mask = tab[(bits >> 4) & 3]; |
| 321 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 322 | + if (j == 3) { |
| 323 | + end_mask = tab[(bits >> 2) & 3]; |
| 324 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst); |
| 325 | + } |
| 326 | + } |
| 327 | + } |
| 328 | + dst1 += p->fix.line_length; |
| 329 | + s += spitch; |
| 330 | + } |
| 331 | +} |
| 332 | + |
| 333 | +/* |
| 334 | + * Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded |
| 335 | + * into the code, main loop unrolled. |
| 336 | + */ |
| 337 | + |
| 338 | +static inline void fast_imageblit32(const struct fb_image *image, |
| 339 | + struct fb_info *p, u8 __iomem * dst1, |
| 340 | + u32 fgcolor, u32 bgcolor) |
| 341 | +{ |
| 342 | + u32 fgx = fgcolor, bgx = bgcolor; |
| 343 | + u32 spitch = (image->width + 7) / 8; |
| 344 | + u32 end_mask, eorx; |
| 345 | + const char *s = image->data, *src; |
| 346 | + u32 __iomem *dst; |
| 347 | + const u32 *tab = NULL; |
| 348 | + int i, j, k; |
| 349 | + |
| 350 | + tab = cfb_tab32; |
| 351 | + |
| 352 | + eorx = fgx ^ bgx; |
| 353 | + k = image->width; |
| 354 | + |
| 355 | + for (i = image->height; i--;) { |
| 356 | + dst = (u32 __iomem *) dst1; |
| 357 | + src = s; |
| 358 | + |
| 359 | + j = k; |
| 360 | + while (j >= 8) { |
| 361 | + u8 bits = *src; |
| 362 | + end_mask = tab[(bits >> 7) & 1]; |
| 363 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 364 | + end_mask = tab[(bits >> 6) & 1]; |
| 365 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 366 | + end_mask = tab[(bits >> 5) & 1]; |
| 367 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 368 | + end_mask = tab[(bits >> 4) & 1]; |
| 369 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 370 | + end_mask = tab[(bits >> 3) & 1]; |
| 371 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 372 | + end_mask = tab[(bits >> 2) & 1]; |
| 373 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 374 | + end_mask = tab[(bits >> 1) & 1]; |
| 375 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 376 | + end_mask = tab[bits & 1]; |
| 377 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 378 | + src++; |
| 379 | + j -= 8; |
| 380 | + } |
| 381 | + if (j != 0) { |
| 382 | + u32 bits = (u32) * src; |
| 383 | + while (j > 1) { |
| 384 | + end_mask = tab[(bits >> 7) & 1]; |
| 385 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); |
| 386 | + bits <<= 1; |
| 387 | + j--; |
| 388 | + } |
| 389 | + end_mask = tab[(bits >> 7) & 1]; |
| 390 | + FB_WRITEL((end_mask & eorx) ^ bgx, dst); |
| 391 | + } |
| 392 | + dst1 += p->fix.line_length; |
| 393 | + s += spitch; |
| 394 | + } |
| 395 | +} |
| 396 | + |
265 | 397 | void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
|
266 | 398 | {
|
267 | 399 | u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
|
@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
|
294 | 426 | bgcolor = image->bg_color;
|
295 | 427 | }
|
296 | 428 |
|
297 |
| - if (32 % bpp == 0 && !start_index && !pitch_index && |
298 |
| - ((width & (32/bpp-1)) == 0) && |
299 |
| - bpp >= 8 && bpp <= 32) |
300 |
| - fast_imageblit(image, p, dst1, fgcolor, bgcolor); |
301 |
| - else |
| 429 | + if (!start_index && !pitch_index) { |
| 430 | + if (bpp == 32) |
| 431 | + fast_imageblit32(image, p, dst1, fgcolor, |
| 432 | + bgcolor); |
| 433 | + else if (bpp == 16 && (width & 1) == 0) |
| 434 | + fast_imageblit16(image, p, dst1, fgcolor, |
| 435 | + bgcolor); |
| 436 | + else if (bpp == 8 && (width & 3) == 0) |
| 437 | + fast_imageblit(image, p, dst1, fgcolor, |
| 438 | + bgcolor); |
| 439 | + else |
| 440 | + slow_imageblit(image, p, dst1, fgcolor, |
| 441 | + bgcolor, |
| 442 | + start_index, pitch_index); |
| 443 | + } else |
302 | 444 | slow_imageblit(image, p, dst1, fgcolor, bgcolor,
|
303 | 445 | start_index, pitch_index);
|
304 | 446 | } else
|
|
0 commit comments