/* tinyproxy - A fast light-weight HTTP proxy
 * Copyright (C) 2004 Robert James Kaes <rjkaes@users.sourceforge.net>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

/* Parses the configuration file and sets up the config_s structure for
 * use by the application.  This file replaces the old grammar.y and
 * scanner.l files.  It takes up less space and _I_ think is easier to
 * add new directives to.  Who knows if I'm right though.
 */

#include "tinyproxy.h"

#include "conffile.h"

#include "acl.h"
#include "anonymous.h"
#include "child.h"
#include "filter.h"
#include "heap.h"
#include "html-error.h"
#include "log.h"
#include "reqs.h"
#include "reverse-proxy.h"

/*
 * The configuration directives are defined in the structure below.  Each
 * directive requires a regular expression to match against, and a
 * function to call when the regex is matched.
 *
 * Below are defined certain constant regular expression strings that
 * can (and likely should) be used when building the regex for the
 * given directive.
 */
#define WS "[[:space:]]+"
#define STR "\"([^\"]+)\""
#define BOOL "(yes|on|no|off)"
#define INT "((0x)?[[:digit:]]+)"
#define ALNUM "([-a-z0-9._]+)"
#define IP "((([0-9]{1,3})\\.){3}[0-9]{1,3})"
#define IPMASK "(" IP "(/[[:digit:]]+)?)"
#define BEGIN "^[[:space:]]*"
#define END "[[:space:]]*$"

/*
 * Limit the maximum number of substring matches to a reasonably high
 * number.  Given the usual structure of the configuration file, sixteen
 * substring matches should be plenty.
 */
#define RE_MAX_MATCHES 16

/*
 * All configuration handling functions are REQUIRED to be defined
 * with the same function template as below.
 */
typedef int (*CONFFILE_HANDLER) (struct config_s *, const char *,
                                 regmatch_t[]);

/*
 * Define the pattern used by any directive handling function.  The
 * following arguments are defined:
 *
 *   struct config_s* conf   pointer to the current configuration structure
 *   const char* line          full line matched by the regular expression
 *   regmatch_t match[]        offsets to the substrings matched
 *
 * The handling function must return 0 if the directive was processed
 * properly.  Any errors are reported by returning a non-zero value.
 */
#define HANDLE_FUNC(func) \
  int func(struct config_s* conf, const char* line, \
	   regmatch_t match[])

/*
 * List all the handling functions.  These are defined later, but they need
 * to be in-scope before the big structure below.
 */
static
HANDLE_FUNC (handle_nop)
{
  return 0;
}                               /* do nothing function */

static HANDLE_FUNC (handle_allow);
static HANDLE_FUNC (handle_anonymous);
static HANDLE_FUNC (handle_bind);
static HANDLE_FUNC (handle_bindsame);
static HANDLE_FUNC (handle_connectport);
static HANDLE_FUNC (handle_defaulterrorfile);
static HANDLE_FUNC (handle_deny);
static HANDLE_FUNC (handle_errorfile);
#ifdef FILTER_ENABLE
static HANDLE_FUNC (handle_filter);
static HANDLE_FUNC (handle_filtercasesensitive);
static HANDLE_FUNC (handle_filterdefaultdeny);
static HANDLE_FUNC (handle_filterextended);
static HANDLE_FUNC (handle_filterurls);
#endif
static HANDLE_FUNC (handle_group);
static HANDLE_FUNC (handle_listen);
static HANDLE_FUNC (handle_logfile);
static HANDLE_FUNC (handle_loglevel);
static HANDLE_FUNC (handle_maxclients);
static HANDLE_FUNC (handle_maxrequestsperchild);
static HANDLE_FUNC (handle_maxspareservers);
static HANDLE_FUNC (handle_minspareservers);
static HANDLE_FUNC (handle_pidfile);
static HANDLE_FUNC (handle_port);
#ifdef REVERSE_SUPPORT
static HANDLE_FUNC (handle_reversebaseurl);
static HANDLE_FUNC (handle_reversemagic);
static HANDLE_FUNC (handle_reverseonly);
static HANDLE_FUNC (handle_reversepath);
#endif
static HANDLE_FUNC (handle_startservers);
static HANDLE_FUNC (handle_statfile);
static HANDLE_FUNC (handle_stathost);
static HANDLE_FUNC (handle_syslog);
static HANDLE_FUNC (handle_timeout);

static HANDLE_FUNC (handle_user);
static HANDLE_FUNC (handle_viaproxyname);
static HANDLE_FUNC (handle_xtinyproxy);

#ifdef UPSTREAM_SUPPORT
static HANDLE_FUNC (handle_upstream);
static HANDLE_FUNC (handle_upstream_no);
#endif

/*
 * This macro can be used to make standard directives in the form:
 *   directive arguments [arguments ...]
 *
 * The directive itself will be the first matched substring.
 *
 * Note that this macro is not required.  As you can see below, the
 * comment and blank line elements are defined explicitly since they
 * do not follow the pattern above.  This macro is for convenience
 * only.
 */
#define STDCONF(d, re, func) { BEGIN "(" d ")" WS re END, func, NULL }

/*
 * Holds the regular expression used to match the configuration directive,
 * the function pointer to the routine to handle the directive, and
 * for internal use, a pointer to the compiled regex so it only needs
 * to be compiled one.
 */
struct
{
  const char *re;
  CONFFILE_HANDLER handler;
  regex_t *cre;
} directives[] =
{
  /* comments */
  {
  BEGIN "#", handle_nop},
    /* blank lines */
  {
  "^[[:space:]]+$", handle_nop},
    /* string arguments */
    STDCONF ("logfile", STR, handle_logfile),
    STDCONF ("pidfile", STR, handle_pidfile),
    STDCONF ("anonymous", STR, handle_anonymous),
    STDCONF ("viaproxyname", STR, handle_viaproxyname),
    STDCONF ("defaulterrorfile", STR, handle_defaulterrorfile),
    STDCONF ("statfile", STR, handle_statfile),
    STDCONF ("stathost", STR, handle_stathost),
    STDCONF ("xtinyproxy", STR, handle_xtinyproxy),
    /* boolean arguments */
    STDCONF ("syslog", BOOL, handle_syslog),
    STDCONF ("bindsame", BOOL, handle_bindsame),
    /* integer arguments */
    STDCONF ("port", INT, handle_port),
    STDCONF ("maxclients", INT, handle_maxclients),
    STDCONF ("maxspareservers", INT, handle_maxspareservers),
    STDCONF ("minspareservers", INT, handle_minspareservers),
    STDCONF ("startservers", INT, handle_startservers),
    STDCONF ("maxrequestsperchild", INT, handle_maxrequestsperchild),
    STDCONF ("timeout", INT, handle_timeout),
    STDCONF ("connectport", INT, handle_connectport),
    /* alphanumeric arguments */
    STDCONF ("user", ALNUM, handle_user),
    STDCONF ("group", ALNUM, handle_group),
    /* ip arguments */
    STDCONF ("listen", IP, handle_listen),
    STDCONF ("allow", "(" IPMASK "|" ALNUM ")", handle_allow),
    STDCONF ("deny", "(" IPMASK "|" ALNUM ")", handle_deny),
    STDCONF ("bind", IP, handle_bind),
    /* error files */
    STDCONF ("errorfile", INT WS STR, handle_errorfile),
#ifdef FILTER_ENABLE
    /* filtering */
    STDCONF ("filter", STR, handle_filter),
    STDCONF ("filterurls", BOOL, handle_filterurls),
    STDCONF ("filterextended", BOOL, handle_filterextended),
    STDCONF ("filterdefaultdeny", BOOL, handle_filterdefaultdeny),
    STDCONF ("filtercasesensitive", BOOL, handle_filtercasesensitive),
#endif
#ifdef REVERSE_SUPPORT
    /* Reverse proxy arguments */
    STDCONF ("reversebaseurl", STR, handle_reversebaseurl),
    STDCONF ("reverseonly", BOOL, handle_reverseonly),
    STDCONF ("reversemagic", BOOL, handle_reversemagic),
    STDCONF ("reversepath", STR WS "(" STR ")?", handle_reversepath),
#endif
#ifdef UPSTREAM_SUPPORT
    /* upstream is rather complicated */
  {
  BEGIN "(no" WS "upstream)" WS STR END, handle_upstream_no},
  {
  BEGIN "(upstream)" WS "(" IP "|" ALNUM ")" ":" INT "(" WS STR ")?" END,
      handle_upstream},
#endif
    /* loglevel */
STDCONF ("loglevel", "(critical|error|warning|notice|connect|info)",
             handle_loglevel)};
const unsigned int ndirectives = sizeof (directives) / sizeof (directives[0]);

/*
 * Compiles the regular expressions used by the configuration file.  This
 * routine MUST be called before trying to parse the configuration file.
 *
 * Returns 0 on success; negative upon failure.
 */
int
config_compile (void)
{
  int i, r;

  for (i = 0; i != ndirectives; ++i)
    {
      assert (directives[i].handler);
      assert (!directives[i].cre);

      directives[i].cre = safemalloc (sizeof (regex_t));
      if (!directives[i].cre)
        return -1;

      r = regcomp (directives[i].cre,
                   directives[i].re, REG_EXTENDED | REG_ICASE | REG_NEWLINE);
      if (r)
        return r;
    }
  return 0;
}

/*
 * Attempt to match the supplied line with any of the configuration
 * regexes defined above.  If a match is found, call the handler
 * function to process the directive.
 *
 * Returns 0 if a match was found and successfully processed; otherwise,
 * a negative number is returned.
 */
static int
check_match (struct config_s *conf, const char *line)
{
  regmatch_t match[RE_MAX_MATCHES];
  unsigned int i;

  assert (ndirectives > 0);

  for (i = 0; i != ndirectives; ++i)
    {
      assert (directives[i].cre);
      if (!regexec (directives[i].cre, line, RE_MAX_MATCHES, match, 0))
        return (*directives[i].handler) (conf, line, match);
    }

  return -1;
}

/*
 * Parse the previously opened configuration stream.
 */
int
config_parse (struct config_s *conf, FILE * f)
{
  char buffer[1024];            /* 1KB lines should be plenty */
  unsigned long lineno = 1;

  while (fgets (buffer, sizeof (buffer), f))
    {
      if (check_match (conf, buffer))
        {
          printf ("Syntax error on line %ld\n", lineno);
          return 1;
        }
      ++lineno;
    }
  return 0;
}

/***********************************************************************
 *
 * The following are basic data extraction building blocks that can
 * be used to simplify the parsing of a directive.
 *
 ***********************************************************************/

static char *
get_string_arg (const char *line, regmatch_t * match)
{
  char *p;
  const unsigned int len = match->rm_eo - match->rm_so;

  assert (line);
  assert (len > 0);

  p = safemalloc (len + 1);
  if (!p)
    return NULL;

  memcpy (p, line + match->rm_so, len);
  p[len] = '\0';
  return p;
}

static int
set_string_arg (char **var, const char *line, regmatch_t * match)
{
  char *arg = get_string_arg (line, match);

  if (!arg)
    return -1;
  *var = safestrdup (arg);
  safefree (arg);
  return *var ? 0 : -1;
}

static int
get_bool_arg (const char *line, regmatch_t * match)
{
  const char *p = line + match->rm_so;

  assert (line);
  assert (match && match->rm_so != -1);

  /* "y"es or o"n" map as true, otherwise it's false. */
  if (tolower (p[0]) == 'y' || tolower (p[1]) == 'n')
    return 1;
  else
    return 0;
}

static int
set_bool_arg (unsigned int *var, const char *line, regmatch_t * match)
{
  assert (var);
  assert (line);
  assert (match && match->rm_so != -1);

  *var = get_bool_arg (line, match);
  return 0;
}

static inline long int
get_int_arg (const char *line, regmatch_t * match)
{
  assert (line);
  assert (match && match->rm_so != -1);

  return strtol (line + match->rm_so, NULL, 0);
}

static int
set_int_arg (int long *var, const char *line, regmatch_t * match)
{
  assert (var);
  assert (line);
  assert (match);

  *var = get_int_arg (line, match);
  return 0;
}

/***********************************************************************
 *
 * Below are all the directive handling functions.  You will notice
 * that most of the directives delegate to one of the basic data
 * extraction routines.  This is deliberate.  To add a new directive
 * to tinyproxy only requires you to define the regular expression
 * above and then figure out what data extract routine to use.
 *
 * However, you will also notice that more complicated directives are
 * possible.  You can make your directive as complicated as you require
 * to express a solution to the problem you're tackling.
 *
 * See the definition/comment about the HANDLE_FUNC() macro to learn
 * what arguments are supplied to the handler, and to determine what
 * values to return.
 *
 ***********************************************************************/

static
HANDLE_FUNC (handle_logfile)
{
  return set_string_arg (&conf->logf_name, line, &match[2]);
}

static
HANDLE_FUNC (handle_pidfile)
{
  return set_string_arg (&conf->pidpath, line, &match[2]);
}

static
HANDLE_FUNC (handle_anonymous)
{
  char *arg = get_string_arg (line, &match[2]);

  if (!arg)
    return -1;

  anonymous_insert (arg);
  safefree (arg);
  return 0;
}

static
HANDLE_FUNC (handle_viaproxyname)
{
  int r = set_string_arg (&conf->via_proxy_name, line, &match[2]);

  if (r)
    return r;
  log_message (LOG_INFO,
               "Setting \"Via\" header proxy to %s", conf->via_proxy_name);
  return 0;
}

static
HANDLE_FUNC (handle_defaulterrorfile)
{
  return set_string_arg (&conf->errorpage_undef, line, &match[2]);
}

static
HANDLE_FUNC (handle_statfile)
{
  return set_string_arg (&conf->statpage, line, &match[2]);
}

static
HANDLE_FUNC (handle_stathost)
{
  int r = set_string_arg (&conf->stathost, line, &match[2]);

  if (r)
    return r;
  log_message (LOG_INFO, "Stathost set to \"%s\"", conf->stathost);
  return 0;
}

static
HANDLE_FUNC (handle_xtinyproxy)
{
#ifdef XTINYPROXY_ENABLE
  return set_string_arg (&conf->my_domain, line, &match[2]);
#else
  fprintf (stderr,
           "XTinyproxy NOT Enabled! Recompile with --enable-xtinyproxy\n");
  return 1;
#endif
}

static
HANDLE_FUNC (handle_syslog)
{
#ifdef HAVE_SYSLOG_H
  return set_bool_arg (&conf->syslog, line, &match[2]);
#else
  fprintf (stderr, "Syslog support not compiled in executable.\n");
  return 1;
#endif
}

static
HANDLE_FUNC (handle_bindsame)
{
  int r = set_bool_arg (&conf->bindsame, line, &match[2]);

  if (r)
    return r;
  log_message (LOG_INFO, "Binding outgoing connection to incoming IP");
  return 0;
}

static
HANDLE_FUNC (handle_port)
{
  return set_int_arg ((long int *) &conf->port, line, &match[2]);
}

static
HANDLE_FUNC (handle_maxclients)
{
  child_configure (CHILD_MAXCLIENTS, get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_maxspareservers)
{
  child_configure (CHILD_MAXSPARESERVERS, get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_minspareservers)
{
  child_configure (CHILD_MINSPARESERVERS, get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_startservers)
{
  child_configure (CHILD_STARTSERVERS, get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_maxrequestsperchild)
{
  child_configure (CHILD_MAXREQUESTSPERCHILD, get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_timeout)
{
  return set_int_arg ((long int *) &conf->idletimeout, line, &match[2]);
}

static
HANDLE_FUNC (handle_connectport)
{
  add_connect_port_allowed (get_int_arg (line, &match[2]));
  return 0;
}

static
HANDLE_FUNC (handle_user)
{
  return set_string_arg (&conf->user, line, &match[2]);
}

static
HANDLE_FUNC (handle_group)
{
  return set_string_arg (&conf->group, line, &match[2]);
}

static
HANDLE_FUNC (handle_allow)
{
  char *arg = get_string_arg (line, &match[2]);

  insert_acl (arg, ACL_ALLOW);
  safefree (arg);
  return 0;
}

static
HANDLE_FUNC (handle_deny)
{
  char *arg = get_string_arg (line, &match[2]);

  insert_acl (arg, ACL_DENY);
  safefree (arg);
  return 0;
}

static
HANDLE_FUNC (handle_bind)
{
#ifndef TRANSPARENT_PROXY
  int r = set_string_arg (&conf->bind_address, line, &match[2]);

  if (r)
    return r;
  log_message (LOG_INFO,
               "Outgoing connections bound to IP %s", conf->bind_address);
  return 0;
#else
  fprintf (stderr,
           "\"Bind\" cannot be used with transparent support enabled.\n");
  return 1;
#endif
}

static
HANDLE_FUNC (handle_listen)
{
  int r = set_string_arg (&conf->ipAddr, line, &match[2]);

  if (r)
    return r;
  log_message (LOG_INFO, "Listing on IP %s", conf->ipAddr);
  return 0;
}

static
HANDLE_FUNC (handle_errorfile)
{
  /*
   * Because an integer is defined as ((0x)?[[:digit:]]+) _two_
   * match places are used.  match[2] matches the full digit
   * string, while match[3] matches only the "0x" part if
   * present.  This is why the "string" is located at
   * match[4] (rather than the more intuitive match[3].
   */
  long int err = get_int_arg (line, &match[2]);
  char *page = get_string_arg (line, &match[4]);

  add_new_errorpage (page, err);
  safefree (page);
  return 0;
}

/*
 * Log level's strings.
 */
struct log_levels_s
{
  const char *string;
  int level;
};
static struct log_levels_s log_levels[] = {
  {"critical", LOG_CRIT},
  {"error", LOG_ERR},
  {"warning", LOG_WARNING},
  {"notice", LOG_NOTICE},
  {"connect", LOG_CONN},
  {"info", LOG_INFO}
};

static
HANDLE_FUNC (handle_loglevel)
{
  static const unsigned int nlevels =
    sizeof (log_levels) / sizeof (log_levels[0]);
  unsigned int i;

  char *arg = get_string_arg (line, &match[2]);

  for (i = 0; i != nlevels; ++i)
    {
      if (!strcasecmp (arg, log_levels[i].string))
        {
          set_log_level (log_levels[i].level);
          safefree (arg);
          return 0;
        }
    }

  safefree (arg);
  return -1;
}

#ifdef FILTER_ENABLE
static
HANDLE_FUNC (handle_filter)
{
  return set_string_arg (&conf->filter, line, &match[2]);
}

static
HANDLE_FUNC (handle_filterurls)
{
  return set_bool_arg (&conf->filter_url, line, &match[2]);
}

static
HANDLE_FUNC (handle_filterextended)
{
  return set_bool_arg (&conf->filter_extended, line, &match[2]);
}

static
HANDLE_FUNC (handle_filterdefaultdeny)
{
  assert (match[2].rm_so != -1);

  if (get_bool_arg (line, &match[2]))
    filter_set_default_policy (FILTER_DEFAULT_DENY);
  return 0;
}

static
HANDLE_FUNC (handle_filtercasesensitive)
{
  return set_bool_arg (&conf->filter_casesensitive, line, &match[2]);
}
#endif

#ifdef REVERSE_SUPPORT
static
HANDLE_FUNC (handle_reverseonly)
{
  return set_bool_arg (&conf->reverseonly, line, &match[2]);
}

static
HANDLE_FUNC (handle_reversemagic)
{
  return set_bool_arg (&conf->reversemagic, line, &match[2]);
}

static
HANDLE_FUNC (handle_reversebaseurl)
{
  return set_string_arg (&conf->reversebaseurl, line, &match[2]);
}

static
HANDLE_FUNC (handle_reversepath)
{
  /*
   * The second string argument is optional.
   */
  char *arg1, *arg2;

  arg1 = get_string_arg (line, &match[2]);
  if (!arg1)
    return -1;

  if (match[3].rm_so != -1)
    {
      arg2 = get_string_arg (line, &match[3]);
      if (!arg2)
        {
          safefree (arg1);
          return -1;
        }
      reversepath_add (arg1, arg2);
      safefree (arg1);
      safefree (arg2);
    }
  else
    {
      reversepath_add (NULL, arg1);
      safefree (arg1);
    }
  return 0;
}
#endif

#ifdef UPSTREAM_SUPPORT
static
HANDLE_FUNC (handle_upstream)
{
  char *ip;
  int port;
  char *domain;

  ip = get_string_arg (line, &match[2]);
  if (!ip)
    return -1;
  port = get_int_arg (line, &match[7]);

  if (match[9].rm_so != -1)
    {
      domain = get_string_arg (line, &match[9]);
      if (domain)
        {
          upstream_add (ip, port, domain);
          safefree (domain);
        }
    }
  else
    {
      upstream_add (ip, port, NULL);
    }

  safefree (ip);

  return 0;
}

static
HANDLE_FUNC (handle_upstream_no)
{
  char *domain;

  domain = get_string_arg (line, &match[2]);
  if (!domain)
    return -1;

  upstream_add (NULL, 0, domain);
  safefree (domain);

  return 0;
}
#endif