/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */


 /* these routines are closely modeled after those from
  * mpeg_play 2.0 by the Berkeley Plateau Research Group
  */

#ifdef DISPLAY

#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>

#include "config.h"
#include "global.h"

/* private prototypes */
static void ditherframe _ANSI_ARGS_((unsigned char *src[]));
static void dithertop _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbot _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void dithertop420 _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbot420 _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherframeRGB _ANSI_ARGS_((unsigned char *src[]));
static void dithertopRGB _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbotRGB _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void init_ditherRGB _ANSI_ARGS_((void));

/* local data */
static unsigned char *dithered_image, *dithered_image2;

static unsigned char ytab[256+16];
static unsigned char utab[128+16];
static unsigned char vtab[128+16];
static unsigned char pixel[256];

typedef void (*DITHER_FRAME_FUNC)(unsigned char **);
typedef void (*DITHER_FIELD_FUNC)(unsigned char **,unsigned char *);
static DITHER_FRAME_FUNC dither_frame;
static DITHER_FIELD_FUNC dither_even, dither_odd;

//888888888888888888888888888888888888

/* color space conversion coefficients
 *
 * entries are {crv,cbu,cgu,cgv}
 *
 * crv=(255/224)*65536*(1-cr)/0.5
 * cbu=(255/224)*65536*(1-cb)/0.5
 * cgu=(255/224)*65536*(cb/cg)*(1-cb)/0.5
 * cgv=(255/224)*65536*(cr/cg)*(1-cr)/0.5
 *
 * where Y=cr*R+cg*G+cb*B (cr+cg+cb=1)
 */

int convmat[8][4] = {
  {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
  {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
  {104597, 132201, 25675, 53279}, /* unspecified */
  {104597, 132201, 25675, 53279}, /* reserved */
  {104448, 132798, 24759, 53109}, /* FCC */
  {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
  {104597, 132201, 25675, 53279}, /* SMPTE 170M */
  {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
};

/* 4x4 ordered dither
 *
 * threshold pattern:
 *   0  8  2 10
 *  12  4 14  6
 *   3 11  1  9
 *  15  7 13  5
 */

void dither(src)
unsigned char *src[];
{
  if (prog_seq)
     dither_frame(src);
  else
  {
    if ((pict_struct==FRAME_PICTURE && topfirst) || pict_struct==BOTTOM_FIELD)
    {
      /* top field first */
      dither_even(src,dithered_image);
      dither_odd (src,dithered_image2);
    }
    else
    {
      /* bottom field first */
      dither_odd (src,dithered_image);
      dither_even(src,dithered_image2);
    }
  }
  display_image(dithered_image);
}

static void ditherframe(src)
unsigned char *src[];
{
  int i,j;
  int y,u,v;
  unsigned char *py,*pu,*pv,*dst;

  py = src[0];
  pu = src[1];
  pv = src[2];

#ifdef _WIN32
  dst = dithered_image+(coded_picture_height-1)*coded_picture_width;
#else
  dst = dithered_image;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y]|utab[u]|vtab[v]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+12]|utab[u+12]|vtab[v+12]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+4]|utab[u+4]|vtab[v+4]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+14]|utab[u+14]|vtab[v+14]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+6]|utab[u+6]|vtab[v+6]];
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+15]|utab[u+15]|vtab[v+15]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+7]|utab[u+7]|vtab[v+7]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+13]|utab[u+13]|vtab[v+13]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+5]|utab[u+5]|vtab[v+5]];
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif
  }

}

static void dithertop(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];

#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+6]|utab[u+6]|vtab[v+6]];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2. j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void ditherbot(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[y2+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[y2+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[y2+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[y2+6]|utab[u+6]|vtab[v+6]];
    }

    if (j==0)
      py -= coded_picture_width;
    else
      py += coded_picture_width;

    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2. j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[y2+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[y2+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[y2+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[y2+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void dithertop420(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y1,u1,v1,y2,u2,v2;
  unsigned char *py1,*pu1,*pv1,*py2,*pu2,*pv2,*dst2;

  py1 = src[0];
  pu1 = src[1];
  pv1 = src[2];

  py2 = py1 + (coded_picture_width<<1);
  pu2 = pu1 + (chrom_width<<1);
  pv2 = pv1 + (chrom_width<<1);

#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)]|utab[u1]|vtab[v1]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+12]|utab[((3*u1+u2)>>2)+12]
                                             |vtab[((3*v1+v2)>>2)+12]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+8]|utab[u1+8]|vtab[v1+8]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+4]|utab[((3*u1+u2)>>2)+4]
                                            |vtab[((3*v1+v2)>>2)+4]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+2]|utab[u1+2]|vtab[v1+2]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+14]|utab[((3*u1+u2)>>2)+14]
                                             |vtab[((3*v1+v2)>>2)+14]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+10]|utab[u1+10]|vtab[v1+10]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+6]|utab[((3*u1+u2)>>2)+6]
                                            |vtab[((3*v1+v2)>>2)+6]];
    }

    py1 += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

    pu1 -= chrom_width;
    pv1 -= chrom_width;
    pu2 -= chrom_width;
    pv2 -= chrom_width;

#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+3]|utab[((u1+u2)>>1)+3]
                                            |vtab[((v1+v2)>>1)+3]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+15]|utab[((u1+3*u2)>>2)+15]
                                             |vtab[((v1+3*v2)>>2)+15]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+11]|utab[((u1+u2)>>1)+11]
                                             |vtab[((v1+v2)>>1)+11]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+7]|utab[((u1+3*u2)>>2)+7]
                                            |vtab[((v1+3*v2)>>2)+7]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+1]|utab[((u1+u2)>>1)+1]
                                            |vtab[((v1+v2)>>1)+1]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+13]|utab[((u1+3*u2)>>2)+13]
                                             |vtab[((v1+3*v2)>>2)+13]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+9]|utab[((u1+u2)>>1)+9]
                                            |vtab[((v1+v2)>>1)+9]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+5]|utab[((u1+3*u2)>>2)+5]
                                            |vtab[((v1+3*v2)>>2)+5]];
    }

    py1 += coded_picture_width;
    py2 += coded_picture_width;
    pu1 += chrom_width;
    pv1 += chrom_width;
    if (j!=(coded_picture_height-8))
    {
      pu2 += chrom_width;
      pv2 += chrom_width;
    }
    else
    {
      pu2 -= chrom_width;
      pv2 -= chrom_width;
    }
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
  }
}

static void ditherbot420(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y1,u1,v1,y2,u2,v2;
  unsigned char *py1,*pu1,*pv1,*py2,*pu2,*pv2,*dst2;

  py2 = py1 = src[0] + coded_picture_width;
  pu2 = pu1 = src[1] + chrom_width;
  pv2 = pv1 = src[2] + chrom_width;

#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
#endif
  dst2 = dst;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+15]|utab[((3*u1+u2)>>2)+15]
                                             |vtab[((3*v1+v2)>>2)+15]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)]|utab[((u1+u2)>>1)]
                                          |vtab[((v1+v2)>>1)]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+7]|utab[((3*u1+u2)>>2)+7]
                                            |vtab[((3*v1+v2)>>2)+7]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+8]|utab[((u1+u2)>>1)+8]
                                            |vtab[((v1+v2)>>1)+8]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+13]|utab[((3*u1+u2)>>2)+13]
                                             |vtab[((3*v1+v2)>>2)+13]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+2]|utab[((u1+u2)>>1)+2]
                                            |vtab[((v1+v2)>>1)+2]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+5]|utab[((3*u1+u2)>>2)+5]
                                            |vtab[((3*v1+v2)>>2)+5]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+10]|utab[((u1+u2)>>1)+10]
                                             |vtab[((v1+v2)>>1)+10]];
    }

    if (j!=0)
      py1 += coded_picture_width;
    else
      py1 -= coded_picture_width;

    py2 += coded_picture_width;

    pu1 -= chrom_width;
    pv1 -= chrom_width;
    pu2 -= chrom_width;
    pv2 -= chrom_width;

#ifdef _WIN32
    if (j!=0)
       dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    if (j!=0)
      dst  += coded_picture_width;
    dst2 += coded_picture_width;
#endif

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+12]|utab[((u1+3*u2)>>2)+12]
                                             |vtab[((v1+3*v2)>>2)+12]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+3]|utab[u2+3]
                                            |vtab[v2+3]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+4]|utab[((u1+3*u2)>>2)+4]
                                            |vtab[((v1+3*v2)>>2)+4]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+11]|utab[u2+11]
                                             |vtab[v2+11]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+14]|utab[((u1+3*u2)>>2)+14]
                                             |vtab[((v1+3*v2)>>2)+14]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+1]|utab[u2+1]
                                            |vtab[v2+1]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+6]|utab[((u1+3*u2)>>2)+6]
                                            |vtab[((v1+3*v2)>>2)+6]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+9]|utab[u2+9]
                                            |vtab[v2+9]];
    }

    py1 += coded_picture_width;
    py2 += coded_picture_width;

    if (j!=0)
    {
      pu1 += chrom_width;
      pv1 += chrom_width;
    }
    else
    {
      pu1 -= chrom_width;
      pv1 -= chrom_width;
    }

    pu2 += chrom_width;
    pv2 += chrom_width;

#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
  }

  py2 -= (coded_picture_width<<1);
  pu2 -= (chrom_width<<1);
  pv2 -= (chrom_width<<1);

  /* dither last line */
  for (i=0; i<coded_picture_width; i+=4)
  {
    y1 = *py1++;
    y2 = *py2++;
    u1 = *pu1++ >> 1;
    v1 = *pv1++ >> 1;
    u2 = *pu2++ >> 1;
    v2 = *pv2++ >> 1;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+15]|utab[((3*u1+u2)>>2)+15]
                                           |vtab[((3*v1+v2)>>2)+15]];

    y1 = *py1++;
    y2 = *py2++;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+7]|utab[((3*u1+u2)>>2)+7]
                                          |vtab[((3*v1+v2)>>2)+7]];

    y1 = *py1++;
    y2 = *py2++;
    u1 = *pu1++ >> 1;
    v1 = *pv1++ >> 1;
    u2 = *pu2++ >> 1;
    v2 = *pv2++ >> 1;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+13]|utab[((3*u1+u2)>>2)+13]
                                           |vtab[((3*v1+v2)>>2)+13]];

    y1 = *py1++;
    y2 = *py2++;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+5]|utab[((3*u1+u2)>>2)+5]
                                          |vtab[((3*v1+v2)>>2)+5]];
  }
}

void init_dither(int bpp)
{
  int i, v;

  if ( bpp==8 )
  {
     dither_frame=ditherframe;
     if (chroma_format==CHROMA420 && hiQdither)
     {
        dither_even =dithertop420;
        dither_odd  =ditherbot420;
     }
     else
     {
        dither_even =dithertop;
        dither_odd  =ditherbot;
     }
  }
  else if ( bpp==24 )
  {
     dither_frame=ditherframeRGB;
     dither_even =dithertopRGB;
     dither_odd  =ditherbotRGB;
     init_ditherRGB();
  }
  else
     error("unsuported dither type");

  bpp/=8;
  if(!(dithered_image = (unsigned char *)malloc(bpp*coded_picture_width*
                                                coded_picture_height)))
    error("malloc failed");

  if(!(dithered_image2 = (unsigned char *)malloc(bpp*coded_picture_width*
                                                 coded_picture_height)))
    error("malloc failed");

  for (i=-8; i<256+8; i++)
  {
    v = i>>4;
    if (v<1)
      v = 1;
    else if (v>14)
      v = 14;
    ytab[i+8] = v<<4;
  }

  for (i=0; i<128+16; i++)
  {
    v = (i-40)>>4;
    if (v<0)
      v = 0;
    else if (v>3)
      v = 3;
    utab[i] = v<<2;
    vtab[i] = v;
  }

  for (i=0; i<256; i++)
     pixel[i]=i;
}

////////////////////////////////////////////////////////////////////////////
//
// RGB dithering routines
//
////////////////////////////////////////////////////////////////////////////

#define BITS	8
#define ONE     ((int) 1)
#define CONST_SCALE	(ONE << BITS)
#define ROUND_FACTOR	(ONE << (BITS-1))
#define UP(x)	(((int)(x)) << BITS)
#define DOWN(x)	(((x) + ROUND_FACTOR) >> BITS)
#define FIX(x)  ((int) ((x)*CONST_SCALE + 0.5))
#define CLAMP(ll,x,ul)	( ((x)<(ll)) ?(ll):( ((x)>(ul)) ?(ul):(x)))

static int Cr_r_tab[256];
static int Cb_g_tab[256];
static int Cr_g_tab[256];
static int Cb_b_tab[256];
static int Cy_tab  [256];

//
// initialize tables used in YCbCr -> RGB conversion
//
static void init_ditherRGB()
{
   int CR, CB, i;

   for (i=0; i<256; i++)
   {
      CB = CR = i - 128;

      Cr_r_tab[i] =  FIX(1.40200) * CR;
      Cb_g_tab[i] = -FIX(0.34414) * CB;
      Cr_g_tab[i] = -FIX(0.71414) * CR;
      Cb_b_tab[i] =  FIX(1.77200) * CB;
      Cy_tab  [i] =  FIX(255.0/219.0) * i;
   }
}

static void ditherframeRGB(src)
unsigned char *src[];
{
   unsigned char *lum=src[0];
   unsigned char *cb =src[1];
   unsigned char *cr =src[2];
   int            cols=coded_picture_width;
   int            rows=coded_picture_height;
   int            L, CR, CB;
   unsigned char *lum2, *r1, *r2;
   int            x, y;
   int            cr_r;
   int            cr_g;
   int            cb_g;
   int            cb_b;

#ifdef _WIN32
   r1 = dithered_image + (rows - 1) * cols * 3;
   r2 = r1 - cols * 3;
#else
   r1 = dithered_image;
   r2 = r1 + cols * 3;
#endif

   lum2 = lum + cols;
   for (y=0; y<rows; y+=2)
   {
      for (x=0; x<cols; x+=2)
      {
         int R, G, B;

         CR = *cr++;
         CB = *cb++;
         cr_r = Cr_r_tab[CR];
         cr_g = Cr_g_tab[CR];
         cb_g = Cb_g_tab[CB];
         cb_b = Cb_b_tab[CB];

         L = *lum++;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         L = *lum++;
         L = Cy_tab[L];
         if (chroma_format==CHROMA444)
         {
            CR = *cr;
            CB = *cb;
            cr_r = Cr_r_tab[CR];
            cr_g = Cr_g_tab[CR];
            cb_g = Cb_g_tab[CB];
            cb_b = Cb_b_tab[CB];
         }
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         /*
          * Now, do second row.
          */
         if (chroma_format!=CHROMA420)
         {
            CR = cr[chrom_width-1];
            CB = cb[chrom_width-1];
            cr_r = Cr_r_tab[CR];
            cr_g = Cr_g_tab[CR];
            cb_g = Cb_g_tab[CB];
            cb_b = Cb_b_tab[CB];
         }
         L = *lum2++;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;

         L = *lum2++;
         L = Cy_tab[L];
         if (chroma_format==CHROMA444)
         {
            CR = cr[chrom_width];
            CB = cb[chrom_width];
            cr_r = Cr_r_tab[CR];
            cr_g = Cr_g_tab[CR];
            cb_g = Cb_g_tab[CB];
            cb_b = Cb_b_tab[CB];
            cr++;
            cb++;
         }
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;
      }
      lum += cols;
      lum2 += cols;
      if (chroma_format!=CHROMA420)
      {
         cr += chrom_width;
         cb += chrom_width;
      }
#ifdef _WIN32
      r1 -= 9 * cols;
      r2 -= 9 * cols;
#else
      r1 += 3 * cols;
      r2 += 3 * cols;
#endif
   }
}

static void dithertopRGB(src,dst)
unsigned char *src[];
unsigned char *dst;
{
   unsigned char *lum=src[0];
   unsigned char *cb =src[1];
   unsigned char *cr =src[2];
   int            cols=coded_picture_width;
   int            rows=coded_picture_height;
   int            L, CR, CB;
   unsigned char *lum2, *r1, *r2;
   int            x, y;
   int            cr_r;
   int            cr_g;
   int            cb_g;
   int            cb_b;
   int            chrom_offset[3];

   if (chroma_format==CHROMA420)
      chrom_offset[0] = -chrom_width;
   else
      chrom_offset[0] =  chrom_width;
   chrom_offset[2] =  chrom_width;

#ifdef _WIN32
   r1 = dst + (rows - 1) * cols * 3;
   r2 = r1 - cols * 3;
#else
   r1 = dst;
   r2 = r1 + cols * 3;
#endif

   lum2 = lum + 2*cols;
   for (y=0; y<rows; y+=2)
   {
      for (x=0; x<cols; x+=2)
      {
         int R, G, B, Y;

         CR = *cr++;
         CB = *cb++;
         cr_r = Cr_r_tab[CR];
         cr_g = Cr_g_tab[CR];
         cb_g = Cb_g_tab[CB];
         cb_b = Cb_b_tab[CB];

         Y = *lum++;
         L = Cy_tab[Y];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         L = (Y + *lum2++)>>1;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;

         Y = *lum++;
         L = Cy_tab[Y];
         if (chroma_format==CHROMA444)
         {
            CR = *cr++;
            CB = *cb++;
            cr_r = Cr_r_tab[CR];
            cr_g = Cr_g_tab[CR];
            cb_g = Cb_g_tab[CB];
            cb_b = Cb_b_tab[CB];
         }
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         L = (Y + *lum2++)>>1;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;
      }
      lum += cols;
      if (y!=(rows-2))
        lum2 += cols;
      else
        lum2 -= cols;
      cb += chrom_offset[y & 2];
      cr += chrom_offset[y & 2];
#ifdef _WIN32
      r1 -= 9 * cols;
      r2 -= 9 * cols;
#else
      r1 += 3 * cols;
      r2 += 3 * cols;
#endif
   }
}

static void ditherbotRGB(src,dst)
unsigned char *src[];
unsigned char *dst;
{
   unsigned char *lum=src[0];
   unsigned char *cb =src[1] + chrom_width;
   unsigned char *cr =src[2] + chrom_width;
   int            cols=coded_picture_width;
   int            rows=coded_picture_height;
   int            L, CR, CB;
   unsigned char *lum2, *r1, *r2;
   int            x, y;
   int            cr_r;
   int            cr_g;
   int            cb_g;
   int            cb_b;
   int            chrom_offset[3];

   if (chroma_format==CHROMA420)
      chrom_offset[0] = -chrom_width;
   else
      chrom_offset[0] =  chrom_width;
   chrom_offset[2] =  chrom_width;

#ifdef _WIN32
   r1 = dst + (rows - 1) * cols * 3;
   r2 = r1 - cols * 3;
#else
   r1 = dst;
   r2 = r1 + cols * 3;
#endif

   lum +=cols;
   lum2 =lum;
   for (y=0; y<rows; y+=2)
   {
      for (x=0; x<cols; x+=2)
      {
         int R, G, B, Y;

         CR = *cr++;
         CB = *cb++;
         cr_r = Cr_r_tab[CR];
         cr_g = Cr_g_tab[CR];
         cb_g = Cb_g_tab[CB];
         cb_b = Cb_b_tab[CB];

         Y = *lum++;
         L = Cy_tab[Y];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         L = (Y + *lum2++)>>1;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;

         Y = *lum++;
         L = Cy_tab[Y];
         if (chroma_format==CHROMA444)
         {
            CR = *cr++;
            CB = *cb++;
            cr_r = Cr_r_tab[CR];
            cr_g = Cr_g_tab[CR];
            cb_g = Cb_g_tab[CB];
            cb_b = Cb_b_tab[CB];
         }
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r1++ = CLAMP(0,B,UP(255)) >> BITS;
         *r1++ = CLAMP(0,G,UP(255)) >> BITS;
         *r1++ = CLAMP(0,R,UP(255)) >> BITS;

         L = (Y + *lum2++)>>1;
         L = Cy_tab[L];
         R = L + cr_r;
         G = L + cr_g + cb_g;
         B = L + cb_b;

         *r2++ = CLAMP(0,B,UP(255)) >> BITS;
         *r2++ = CLAMP(0,G,UP(255)) >> BITS;
         *r2++ = CLAMP(0,R,UP(255)) >> BITS;
      }
      lum += cols;
      if (y!=0)
        lum2 += cols;
      else
        lum2 -= cols;
      cb += chrom_offset[y & 2];
      cr += chrom_offset[y & 2];
#ifdef _WIN32
      r1 -= 9 * cols;
      r2 -= 9 * cols;
#else
      r1 += 3 * cols;
      r2 += 3 * cols;
#endif
   }
}


//
// portable display function
//
void display_second_field()
{
  display_image(dithered_image2);
}

#endif // DISPLAY

