/* 
  rl - Select a random line from stdin or file.
  
  Copyright (C) 2001 Arthur de Jong
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software Foundation,
  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
*/


#include <config.h>

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <errno.h>
#include <string.h>

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#else
#include <unistd.h>
#endif
#ifndef HAVE_GETOPT_LONG
#include "getopt_long.h"
#endif

#include "rl.h"


/* the maximum value for --count */
#define MAXCOUNT 1024


/* the average length of a line */
#define AVGLINELEN 50


/* the size in which to load files */
#define BLOCKSIZE 8192


/* the name of the program */
static char *program_name;


/* flag to indicate output */
int quiet=0;


/* Option flags and variables */
static struct option const long_options[] =
{
  {"count",required_argument,NULL,'c'},
  {"uniq",no_argument,NULL,'u'},
  {"quiet",no_argument,NULL,'q'},
  {"silent",no_argument,NULL,'q'},
  {"help",no_argument,NULL,'h'},
  {"version",no_argument,NULL,'V'},
  {NULL,0,NULL,0}
};
/* for adding options you should add to
    long_options[]  (directly above)
    OPTION_STRING   (directly below)
    display_usage() (below)
    main()          (for the handling of the option) */
#define OPTION_STRING "c:uqhV"


/* structure for holding a line of input (inlcuding newline) */
struct line
{
  char *line;                   /* pointer to allocated memory */
  int alloc;                    /* size of allocated memory */
  int len;                      /* amount of memory in use */
};


/* display usage information */
static void display_usage(FILE *fp)
{
  fprintf(fp,_("Usage: %s [OPTIONS]... [FILE]...\n"),program_name);
  fprintf(fp,_("Select a random line from stdin or a file.\n\n"));
  fprintf(fp,_("  -c, --count=N  output N lines (defualt is 1)\n"));
  fprintf(fp,_("  -u, --uniq     only one instance of each line is retured\n"));
  fprintf(fp,_("  -q, --quiet, --silent\n"
               "                 do not output any errors or warnings\n"));
  fprintf(fp,_("  -h, --help     display this help and exit\n"));
  fprintf(fp,_("  -V, --version  output version information and exit\n"));
}


/* display a use --help notice */
static void display_tryhelp(FILE *fp)
{
  fprintf(fp,_("Try `%s --help' for more information.\n"),program_name);
}


/* display version information */
static void display_version(FILE *fp)
{
  fprintf(fp,"rl %s\n",VERSION);
  fprintf(fp,_("Written by Arthur de Jong.\n\n"));
  fprintf(fp,_("Copyright (C) 2001 Arthur de Jong.\n"
               "This is free software; see the source for copying conditions.  There is NO\n"
               "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
}


/* intialize struct line and allocate memory for it */
void init_line(struct line *line,int alloc)
{
  line->alloc=alloc;
  line->line=xxmalloc(char,line->alloc);
  line->len=0;
}


/* change the size of the line */
void grow_line(struct line *line,int alloc)
{
  line->alloc=alloc;
  line->line=xxrealloc(line->line,char,line->alloc);
}


/* free allocated memory */
void free_line(struct line *line)
{
  if (line->line==NULL)
    return; /* not allocated so not freed */
  xfree(line->line);
  line->line=NULL;
  line->alloc=0;
}


/* read a single line from the file 
   this function returns a struct line structure containing
   the read line with newline or NULL on EOF */
struct line *getline(FILE *in,struct line *line)
{
  int c; /* the char read */
  struct line *orr;
  /* initialize new line if NULL */
  orr=line;
  if (line==NULL)
  {
    /* make a new line */
    line=xxmalloc(struct line,1);
    init_line(line,AVGLINELEN*2);
  }
  line->len=0;
  while ( ((c=fgetc(in))!=EOF) )
  {
    /* check allocated memory */
    if (line->alloc<(line->len+1))
      grow_line(line,line->alloc*2);
    /* store */
    line->line[line->len++]=c;
    /* check for '\n' */
    if (c=='\n')
      break; /* exit while */
  }
  /* check if anything was read */
  if (line->len==0)
  {
    /* free the line if it was created here */
    if (orr==NULL)
    {
      free_line(line);
      xfree(line);
    }
    return NULL;
  }
  /* the line was read */
  return line;
}


/* make a copy of the sc line into dst line
   optionaly growing of allocationg dst
   the orriginal content of dst is lost and a pointer
   to the result (dst or a new line if dst==NULL) */
struct line *copyline(struct line *dst,struct line *src)
{
  if (dst==NULL)
  {
    dst=xxmalloc(struct line,1);
    init_line(dst,src->len+AVGLINELEN/2);
  }
  else if (dst->alloc<src->len)
  {
    grow_line(dst,src->len+AVGLINELEN/2);
  }
  memcpy(dst->line,src->line,src->len);
  dst->len=src->len;
  return dst;
}


/* read the whole file and pick out count random lines and dump to stdout
   count and in are assumed to be reasonable values 
   (maybe a better functionname, my English is not that good )
   any line has an equal chance of getting picked
   and lines may be selected multiple times */
void rl_withreplace(FILE *in,int count)
{
  struct line **result;         /* array of pointers to selected lines */
  struct line *current;         /* currently read line */
  int line=0;                   /* the number of the current line */
  int i;                        /* counter for loops */
  /* initialize the result */
  result=xxmalloc(struct line *,count);
  for (i=0;i<count;i++)
    result[i]=NULL;
  /* read the lines one by one */
  current=NULL;
  while ((current=getline(in,current))!=NULL)
  {
    line++;
    for (i=0;i<count;i++)
    {
       /* NOTE: this may fail if there are a LOT of lines in the file */
       if (random_draw(1,line))
       {
         /* save a new line */
         result[i]=copyline(result[i],current);
       }
    }
  }
  /* dump the result (if any lines were read) */
  if (line>0)
  {
    for (i=0;i<count;i++)
      fwrite(result[i]->line,sizeof(char),result[i]->len,stdout);
  }
  /* free the memory! */
  if (current!=NULL)
  {
    free_line(current);
    xfree(current);
  }
  for (i=0;i<count;i++)
  {
    if (result[i]!=NULL)
    {
      free_line(result[i]);
      xfree(result[i]);
    }
  }
  xfree(result);
}


/* read the whole file and pick out count random lines and dump to stdout
   count and in are assumed to be reasonable values 
   (maybe a better functionname, my English is not that good )
   any line can only be picked once */
void rl_withoutreplace(FILE *in,int count)
{
  struct line **result;         /* array of pointers to selected lines */
  struct line *t=NULL;          /* temporary pointer for swapping lines */
  struct line *current;         /* currently read line */
  int line=0;                   /* the number of the current line */
  int i;                        /* counter for loops */
  int j;                        /* for swapping */
  /* initialize result */
  result=xxmalloc(struct line *,count);
  /* initialize some lines */
  while ( (line<count) && ((result[line]=getline(in,NULL))!=NULL) )
  {
    line++;
  }
  /* randomize lines */
  for (i=0;i<line;i++)
  {
    j=random_below(line);
    t=result[i];
    result[i]=result[j];
    result[j]=t;
  }
  t=NULL;
  /* check if the buffer is filled */
  if (line<count)
  {
    if (!quiet)
      fprintf(stderr,_("Warning: less than %d lines were read.\n"),count);
  }
  else
  {
    /* read the lines one by one */
    current=NULL;
    while ((current=getline(in,current))!=NULL)
    {
      line++;
      if (random_draw(count,line))
      {
        i=random_below(count);
        /* swap current with result[i] */
        t=result[i];
        result[i]=current;
        current=t;
      }
    }
  }
  /* dump the result (if any lines were read) */
  for (i=0;(i<line)&&(i<count);i++)
  {
    fwrite(result[i]->line,sizeof(char),result[i]->len,stdout);
  }
  /* free the memory! */
  if (t!=NULL) /* t accidentaly point to the last current */
  {
    free_line(t);
    xfree(t);
  }
  for (i=0;(i<line)&&(i<count);i++)
  {
    if (result[i]!=NULL)
    {
      free_line(result[i]);
      xfree(result[i]);
    }
  }
  xfree(result);
}


/* read file and randomize and output all lines 
   this is done in a way that any line will only be returned once */
void rl_randomizefile(FILE *in)
{
  struct line buffer;           /* for reading the file */
  int *result;                  /* array of pointers to selected lines */
  struct line *lines;           /* an array of all read lines */
  int alloc;                    /* allocated size of result */
  int count;                    /* number of lines in result */
  int i;                        /* multi purpose */
  int j;                        /* multi purpose */
  int t;                        /* for swapping */
  /* read the file */
  init_line(&buffer,BLOCKSIZE);
  for (i=0;(j=fread(buffer.line+i,sizeof(char),BLOCKSIZE,in))==BLOCKSIZE;)
  {
    /* grow buffer if needed */
    if (buffer.alloc<=i+j+BLOCKSIZE)
      grow_line(&buffer,buffer.alloc*2);
    i+=j;
  }
  buffer.len=i+j;
  /* initialize result */
  alloc=buffer.len/AVGLINELEN+5;
  result=xxmalloc(int,alloc);
  lines=xxmalloc(struct line,alloc);
  count=0;
  /* split in lines */
  for (i=0;i<buffer.len;)
  {
    /* check if realloc is needed */
    if (count>=alloc)
    {
      alloc=alloc*2; /* *4/3 */ /* only slightly grow */
      result=xxrealloc(result,int,alloc);
      lines=xxrealloc(lines,struct line,alloc);
    }
    /* find the end of the line */
    for (j=i;(j<buffer.len)&&(buffer.line[j]!='\n');j++);
    if (buffer.line[j]=='\n') j++;
    /* build the line */
    lines[count].line=buffer.line+i;
    lines[count].alloc=0;
    lines[count].len=j-i;
    result[count]=count;
    /* ready for the next line */
    i=j; /* continue from end of line */
    count++;
  }
  /* randomize lines */
  for (i=0;i<count;i++)
  {
    j=random_below(count);
            t=result[i];
    result[i]=result[j];
    result[j]=t;
  }
  /* dump the result */
  for (i=0;i<count;i++)
  {
    j=result[i];
    fwrite(lines[j].line,sizeof(char),lines[j].len,stdout);
  }
  /* free the memory! */
  xfree(result);
  xfree(lines);
  free_line(&buffer);
}


/* the main program */
int main(int argc,char **argv)
{
  int c;        /* option charaters */
  FILE *fp;     /* for reading command-line specified files */
  int count=-1;  /* the command-line parameter */
  char *endptr; /* used for command-line parsing */
  int uniq=0;   /* wether to return same lines or not */

  program_name=argv[0];
  
  /* parse command-line options */
  while ((c=getopt_long(argc,argv,OPTION_STRING,
                        long_options,(int *)NULL))!=-1)
  { 
    /* find out which option was specified */
    switch (c)
    {
    case 'V': /* -V, --version */
      display_version(stdout);
      exit(0);
    case 'h': /* -h, --help */
      display_usage(stdout);
      exit(0);
    case 'c': /* -c, --count=N */
      count=strtol(optarg,&endptr,0);
      if ( (optarg[0]=='\0') || (endptr[0]!='\0') || 
           (count<1) || (count>MAXCOUNT) )
      {
        if (!quiet)
        {
          fprintf(stderr,_("%s: invalid argument to count\n"),program_name);
          display_tryhelp(stderr);
        }
        exit(1);
      }
      break;
    case 'u': /* -u, --uniq */
      uniq=1;
      break;
    case 'q': /* -q, --quiet, --silent */
      quiet=1; /* true */
      opterr=0; /* disable error-reporting of getopt() */
      break;
    case ':': /* missing parameter of an option */
    case '?': /* unknown option */
    default:  /* undefined */
      if (!quiet)
        display_tryhelp(stderr);
      exit(1);
    }
  }

  /* intialize the random-number generator */
  randomize();
  
  /* rest of parameters are filenames */
  if (optind>=argc)
  {
    if (count<=0)
      rl_randomizefile(stdin);
    else if (uniq)
      rl_withoutreplace(stdin,count);
    else
      rl_withreplace(stdin,count);
  }
  else
  {
    for (;optind<argc;optind++)
    {
      if (strncmp("-",argv[optind],2)==0)
      {
       if (count<=0)
          rl_randomizefile(stdin);
       else if (uniq)
          rl_withoutreplace(stdin,count);
        else
          rl_withreplace(stdin,count);
      }
      else
      {
        if ((fp=fopen(argv[optind],"r"))==NULL)
        {
          if (!quiet)
            fprintf(stderr,_("Error opening %s: %s\n"),argv[optind],strerror(errno));
        }
        else
        {
          if (count<=0)
            rl_randomizefile(fp);
          else if (uniq)
            rl_withoutreplace(fp,count);
          else
            rl_withreplace(fp,count);
          fclose(fp);
        }
      }
    }
  }
    
  exit (0);
}
