/** Minicomun, a small programming language. This is a single-header C99
  implementation of minicomun, a small version of the comun language. This is a
  small, KISS implementation that interprets the source text directly and isn't
  very fast. This file can also be compiled as a standalone interpreter
  (#define MCM_STANDALONE 1). Compared to comun this minicomun only has one type
  environment (set with MCM_TYPE), doesn't support file includes, preprocessing,
  goto and only supports user pointers in a limited way (only predefined number
  of one letter pointers of size 1 which can only be used as variable). Due to
  simpliity some non-comun programs may actually be accepted by this minicomun
  implementation (pointers don't have to be defined).

  by drummyfish, 2022, released under CC0 1.0, public domain
  (https://creativecommons.org/publicdomain/zero/1.0/) */

#ifndef _MINICOMUN_H
#define _MINICOMUN_H

#ifndef MCM_STANDALONE
  #define MCM_STANDALONE 1 ///< includes implementation of standalone interpr.
#endif

#ifndef MCM_TYPE
  #define MCM_TYPE unsigned int ///< minicomum data type, has to be unsigned
#endif

#ifndef MCM_CELLS
  #define MCM_CELLS 512 ///< number of memory cells
#endif

#ifndef MCM_PARSE_STACK_SIZE
  #define MCM_PARSE_STACK_SIZE 128 ///< parse stack size, says maximum nesting
#endif

#ifndef MCM_CALL_STACK_SIZE
  #define MCM_CALL_STACK_SIZE 64 ///< call stack size, max. func. call nesting
#endif

#ifndef MCM_STANDALONE_MAX_SOURCE_SIZE
  #define MCM_STANDALONE_MAX_SOURCE_SIZE 4096 ///< buffer size for source code
#endif

#ifndef MCM_POINTERS
  #define MCM_POINTERS 8 ///< number of usable pointers (e.g. 3: a, b and c)
#endif

#define MCM_OK                      0x00 ///< all OK
#define MCM_ERROR                   0x01 ///< generic error
#define MCM_ERROR_BAD_TOKEN         0x02 ///< error: syntactically wrong token
#define MCM_ERROR_BAD_OPERATION     0x03 ///< error: invalid op. (0 div etc.)
#define MCM_ERROR_UNEXPECTED_TOKEN  0x04 ///< error: unexpected token found
#define MCM_ERROR_UNEXPECTED_END    0x05 ///< error: unexpected end of source
#define MCM_ERROR_MEMORY_BOUND_LEFT 0x06 ///< error: memory over/underflow
#define MCM_ERROR_CALL_STACK        0x07 ///< error: call stack overflow
#define MCM_ERROR_NESTING_TOO_DEEP  0x08 ///< error: parse stack overflow

MCM_TYPE MCM_memory[MCM_CELLS];
unsigned int MCM_stackTop;
unsigned int MCM_sourcePos;
MCM_TYPE MCM_pointers[MCM_POINTERS];

/** Interprets given minicomum source code. Any of the function parameters may
  be 0 in which case they're not used. */
char MCM_interpret(const char *code, int (*inputFunction)(void),
  void (*outputFunction)(int), void (*stepFunction)(void),
  void (*externalCallFunction)(const char *,unsigned int));

//------------------------------------------------------------------------------
// privates:

// Moves MCM_sourcePos to next token start, returns its length (0 if end).
unsigned int _MCM_nextToken(const char *code)
{
  unsigned char state = 0;
  unsigned int length = 0;

  while (1)
  {
    char c = code[MCM_sourcePos + length];

    if (c == 0)
      return length;

    switch (state)
    {
      case 0: // skipping starting whitespaces
        if (c == '#')
        {
          state = 1;
          MCM_sourcePos++;
        }
        else if (c > ' ')
        {
          state = 2 + (c == '"');
          length++;   
        }
        else   
          MCM_sourcePos++;

        break;     
 
      case 1: // inside comment
        if (c == '#' || c == '\n')
          state = 0;

        MCM_sourcePos++;
        break;

      case 2: // reading non-string token
        if ((c <= ' ') || c == '#')
          return length;

        length++;
        break;

      case 3: // reading string token
        if (c == '"')
          return length + 1;

        length++;
        break;

      default: break;
    }
  }
  
  return 0;
}

int _MCM_findFunc(const char *code, const char *funcName, unsigned int len)
{
  unsigned char state = 0;
  int pos = 0;

  while (1) // read chars
  {
    if (*code == 0)
      break;

    switch (state)
    {
      case 0: // skipping blanks
      {
        if (*code == '#')
          state = 1;
        else if (*code > ' ')
        {
          unsigned int matched = 0;

          while (matched < len &&
            *(code + matched) == *(funcName + matched))
            matched++;

          if (matched == len && *(code + len) == ':'
            && (*(code + len + 1) <= ' '))
            return pos;
        }

        break;
      }

      case 1: // in comment
        if (*code == '#' || *code == '\n')
          state = 0;

        break;

      case 2: // token
        if (*code <= ' ')
          state = 0;
        else if (*code == '#')
          state = '#';

        break;

      default: break;
    }

    code++;
    pos++;
  }

  return -1;
}

// Checks if token is identifier, returns 0 (no), 1 (yes), 2 (yes, func. def.).
unsigned char _MCM_checkIdentifier(const char *string, int len)
{
  if (*string >= '0' && *string <= '9')
    return 0;

  unsigned char ret = 1;

  if (string[len - 1] == ':')
  {
    len--;
    ret++;
  } 

  while (len > 0)
  {
    if (*string != '_' &&
      (*string < '0' || *string > '9') &&
      (*string < 'a' || *string > 'z') &&
      (*string < 'A' || *string > 'Z'))
      return 0;

    string++;
    len--;
  }

  return ret;
}

// Checks if num. literal is valid (returns 0 or 1) and possibly gets its value.
unsigned char _MCM_checkNumericLiteral(const char *literal, int len,
  MCM_TYPE *value)
{
  unsigned char positive = 1;
  unsigned char base = 10;  

  if (*literal == '+' || *literal == '-')
  {
    positive = *literal == '+';
    literal++;
    len--;
  }

  if (*literal == 'd')
  {
    literal++;
    len--;
  }
  else if (*literal == 'x')
  {
    base = 16;
    literal++;
    len--;
  }
  else if (*literal == 'b')
  {
    base = 2;
    literal++;
    len--;
  }

  if (len <= 0)
    return 0;

  *value = 0;

  while (len > 0)
  {
    unsigned char digit =
      (*literal >= '0' && *literal <= '9') ?
      (*literal - '0') :
        ((*literal >= 'a' && *literal <= 'f') ?
          (*literal - 'a' + 10) : 200);

    if (digit >= base)
      return 0;

    *value = *value * base + digit;

    literal++;
    len--;
  }

  if (!positive)
    *value = 0 - *value;

  return 1;
}

long int _MCM_toSigned(MCM_TYPE x)
{
  long int result = x;

  MCM_TYPE typeMax = 0 - 1;

  if (result > typeMax / 2)
  {
    result -= typeMax;
    result--;
  }

  return result;
}

MCM_TYPE _MCM_fromSigned(long int x)
{
  return (unsigned long) x;
}

MCM_TYPE _MCM_compare(MCM_TYPE a, MCM_TYPE b, unsigned char greater,
  unsigned char equal, unsigned char isSigned)
{
  if (isSigned)
  {
    MCM_TYPE add = 0 - 1;
    add = add / 2 + 1;

    a += add;
    b += add;
  }

  return equal ? (greater ? (a >= b) : (a <= b)) :
    (greater ? (a > b) : (a < b));
}

char MCM_interpret(const char *code, int (*inputFunction)(void),
  void (*outputFunction)(int), void (*stepFunction)(void),
  void (*externalCallFunction)(const char *,unsigned int))
{
  unsigned int parseStack[MCM_PARSE_STACK_SIZE];
  unsigned int parseStackTop = 0;
  unsigned int callStack[MCM_CALL_STACK_SIZE];
  unsigned int callStackTop = 0;
  unsigned char state = 0; /* 0: go, 1: skip to . or ;, 2: skip to .,
                              3: loop break, 4: func break */
  int bracketCount = 0;

  MCM_stackTop = MCM_CELLS - 1;

  while (MCM_stackTop > 0)
  {
    MCM_memory[MCM_stackTop] = 0;
    MCM_stackTop--;
  }

  MCM_stackTop++; // this pushes 0, meaning 0 program arguments received

  for (int i = 0; i < MCM_POINTERS; ++i)
    MCM_pointers[i] = 0;

  MCM_memory[0] = 0;
  MCM_sourcePos = 0;

  unsigned char eof = 0;

  while (1) // for each token
  {
    unsigned int tokenLen = _MCM_nextToken(code);

    if (tokenLen == 0)
      break;

#define C(i) code[MCM_sourcePos + (i)]
#define M(i) MCM_memory[MCM_stackTop - i]

    MCM_TYPE result = 0;
    unsigned char popN = 1;
    unsigned char doPop = 1;
    unsigned char normalOperation = 1;
    unsigned int nextPos = MCM_sourcePos + tokenLen;
    unsigned char identifier;

    if (C(tokenLen - 1) == '\'')
    {
      tokenLen--;
      doPop = 0;
    }

    if (state == 0) // not skipping?
    {
      identifier = _MCM_checkIdentifier(code + MCM_sourcePos,tokenLen);

      if (identifier == 1) // func. call?
      {
        int funcPos = _MCM_findFunc(code,code + MCM_sourcePos,tokenLen);

        if (funcPos < 0)
        {
          if (externalCallFunction != 0)
            externalCallFunction(code + MCM_sourcePos,tokenLen);
        }
        else
        {
          parseStack[parseStackTop] = MCM_sourcePos;
          parseStackTop++;

          if (parseStackTop >= MCM_PARSE_STACK_SIZE)
            return MCM_ERROR_NESTING_TOO_DEEP;

          callStack[callStackTop] = nextPos;
          callStackTop++;

          if (callStackTop >= MCM_CALL_STACK_SIZE)
            return MCM_ERROR_CALL_STACK;

          nextPos = funcPos + tokenLen + 1;
        }

        normalOperation = 0;
      }
      else if (identifier == 2) // func. def?
      {
        parseStack[parseStackTop] = MCM_sourcePos;
        parseStackTop++;
        state = 1;
        bracketCount = 0;
        normalOperation = 0;
      }
      else if (C(0) == '"') // string
      {
        if (C(tokenLen - 1) != '"')
          return MCM_ERROR_BAD_TOKEN;

        for (int i = tokenLen - 2; i > 0; --i)
        {
          char c = code[MCM_sourcePos + i];

          if (c == '"')
            return MCM_ERROR_BAD_TOKEN;

          MCM_stackTop++;

          if (MCM_stackTop >= MCM_CELLS)
            return MCM_ERROR_MEMORY_BOUND_LEFT;

          M(0) = c;
        }

        normalOperation = 0;
      }
      else if (_MCM_checkNumericLiteral(code + MCM_sourcePos,tokenLen,&result))
      {
        popN = 0;
      }
      else if (tokenLen <= 2)
      {
#define CHARS(a,b) ((a << 8) | b)
        unsigned int chars = CHARS(C(0),(tokenLen == 2 ? C(1) : 0));

        switch (chars)
        {
          case CHARS('$',0):
          {
            unsigned int n = M(0);

            if (n > MCM_stackTop)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            result = M(n);
            break;
          }

          case CHARS('@',0):
            parseStack[parseStackTop] = MCM_sourcePos;
            parseStackTop++;

            if (!M(0))
            {
              state = 1; // skip the loop
              bracketCount = 0;
            }

            if (doPop)
              MCM_stackTop--;

            normalOperation = 0;
            break;

          case CHARS('?',0):
            parseStack[parseStackTop] = MCM_sourcePos;
            parseStackTop++;

            if (!M(0))
            {
              state = 1;
              bracketCount = 0; // skip the branch
            }

            if (doPop)
              MCM_stackTop--;

            normalOperation = 0;
            break;

          case CHARS(';',0):
            state = 1;
            bracketCount = 0; // skip the branch
            normalOperation = 0;
            break;

          case CHARS('.',0):
            if (parseStackTop == 0)
              return MCM_ERROR_UNEXPECTED_TOKEN;
             
            parseStackTop--;

            char c = code[parseStack[parseStackTop]];

            if (c == '@')
              nextPos = parseStack[parseStackTop];
            else if (c != '?') // function return?
            {
              callStackTop--;
              nextPos = callStack[callStackTop];
            }

            normalOperation = 0;
            break;

          case CHARS('^',0):
            if (!doPop)
              return MCM_ERROR_BAD_TOKEN; // not allowed with pop

            MCM_stackTop--;
            normalOperation = 0;
            break;

          case CHARS('$','$'):
            result = MCM_stackTop;
            popN = 0;
            break;

          case CHARS('!','!'):
            result = !M(0);
            popN = 1;
            break;

          case CHARS('!',0):
            result = ~M(0);
            popN = 1;
            break;

          case CHARS('/',0):
          case CHARS('%',0):
          case CHARS('/','/'):
          case CHARS('%','%'):
            if (M(0) == 0)
              return MCM_ERROR_BAD_OPERATION;

            if (MCM_stackTop < 1)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            result = 
              C(1) == '/' ? 
                _MCM_fromSigned(_MCM_toSigned(M(1)) / _MCM_toSigned(M(0))) :
              (
                C(1) == '%' ?
                  _MCM_fromSigned(_MCM_toSigned(M(1)) % _MCM_toSigned(M(0))) :
                (C(0) == '/' ? (M(1) / M(0)) : (M(1) % M(0)))
              );

            popN = 2;
            break;

          case CHARS('|','>'):
          case CHARS('|','<'):
            result = (M(0) >= (sizeof(MCM_TYPE) * 8)) ? 0 :
              (C(1) == '>' ? (M(1) >> M(0)) : (M(1) << M(0)));

            popN = 2;
            break;

#define OP2(c1,c2,op) case CHARS(c1,c2):\
    if (MCM_stackTop < 1) return MCM_ERROR_MEMORY_BOUND_LEFT;\
    result = M(1) op M(0); popN = 2; break;
          OP2('=',0,==)
          OP2('!','=',!=)
          OP2('*',0,*)
          OP2('|',0,|)
          OP2('&',0,&)
          OP2('+',0,+)
          OP2('-',0,-)
          OP2('&','&',&&)
          OP2('|','|',||)
          OP2('|','!',^)
#undef OP2

          case CHARS('<',0):
          case CHARS('>',0):
          case CHARS('<','='):
          case CHARS('>','='):
          case CHARS('>','>'):
          case CHARS('<','<'):
            if (MCM_stackTop < 1)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            popN = 2;
            result = _MCM_compare(M(1),M(0),C(0) == '>',C(1) == '=',
              C(1) == '<' || C(1) == '>');
            break;

          case CHARS('?','?'):
            if (MCM_stackTop < 2)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            popN = 3;
            result = M(2) ? M(1) : M(0);
            break;

          case CHARS('@','@'):
            parseStack[parseStackTop] = MCM_sourcePos;
            parseStackTop++;
            normalOperation = 0;
            break;

          case CHARS('!','@'):
            state = 3;
            bracketCount = 0;
            normalOperation = 0;
            break;

          case CHARS('!','.'):
            if (callStackTop > 0)
              state = 4;
            else
              return MCM_OK;

            normalOperation = 0;

            break;

          case CHARS('>','<'):
          {
            if (MCM_stackTop < 1)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            MCM_TYPE a = M(0), b = M(1);

            MCM_stackTop += 2 * (!doPop);

            if (MCM_stackTop >= MCM_CELLS)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            M(0) = b;
            M(1) = a;

            normalOperation = 0;
            break;
          }

          case CHARS('<','-'):
          {
            int v = (eof || inputFunction == 0) ? 0 : inputFunction();

            if (v < 0)
            {
              eof = 1;
              v = 0;
            }

            popN = 0;
            result = v;
            break;
          }

          case CHARS('<','?'):
            result = !eof;
            popN = 0;
            break;

          case CHARS('-','>'):
            if (outputFunction != 0)
              outputFunction(M(0));

            normalOperation = 0;
            MCM_stackTop -= doPop;
            break;

          case CHARS('+','+'): result = M(0) + 1; break;
          case CHARS('-','-'): result = M(0) - 1; break;

          default:
            if (C(0) == '$') // $N
            {
              popN = 0;

              if (C(1) >= '0' && C(1) <= '9')
              {
                unsigned char n = C(1) - '0';

                if (n > MCM_stackTop)
                  return MCM_ERROR_MEMORY_BOUND_LEFT;

                result = M(n);
              }
              else
              {
                unsigned char n = C(1) - 'a';

                if (n > MCM_POINTERS)
                  return MCM_ERROR_BAD_TOKEN;

                result = MCM_pointers[n];
              }
            }
            else if (C(0) == '~' && C(1) >= 'a' && C(1) < 'a' + MCM_POINTERS)
              normalOperation = 0;
            else
              return MCM_ERROR_BAD_TOKEN;

            break;
        } // switch
#undef CHARS
      }
      else if (tokenLen == 3)
      {
        if (C(0) == '$')
        {
          if (C(1) == ':' && (C(2) >= 'a' && C(2) < 'a' + MCM_POINTERS))
          {
            normalOperation = 0;
            MCM_pointers[C(2) - 'a'] = M(0);          

            if (doPop)
              MCM_stackTop--;
          }
          else
          {
            unsigned int n = C(2) - '0';

            if (n > 9)
              return MCM_ERROR_BAD_TOKEN;

            switch (C(1))
            {
              case ':': // $:N
              {
                if (n > MCM_stackTop)
                  return MCM_ERROR_MEMORY_BOUND_LEFT;

                M(n) = M(0);

                if (doPop)
                  MCM_stackTop--;

                break;    
              }

              case '>': if (n == 0) MCM_stackTop++; break; // $>N
              case '<': if (n == 0) MCM_stackTop--; break; // $<N
              case '+': if (n == 0) MCM_stackTop += _MCM_toSigned(M(0)); break; 
              default: break;
            }
          }
            
          normalOperation = 0;
        }
        else if (C(0) == '-' && C(1) == '-' && C(2) == '>')
        {
          if (!doPop) // not allowed with this operation
            return MCM_ERROR_BAD_TOKEN;

          while (1)
          {
            if (MCM_stackTop == 0)
              return MCM_ERROR_MEMORY_BOUND_LEFT;

            MCM_stackTop--;

            if (MCM_memory[MCM_stackTop + 1] == 0)
              break;
            else if (outputFunction != 0)
              outputFunction(MCM_memory[MCM_stackTop + 1]);
          }

          normalOperation = 0;
        }
        else if (C(2) == '=' && C(0) == C(1) && (C(0) == '<' || C(1) == '>'))
        {
          if (MCM_stackTop < 1)
            return MCM_ERROR_MEMORY_BOUND_LEFT;
            
          result = _MCM_compare(M(1),M(0),C(0) == '>',1,1);
          popN = 2;
        }
        else if (C(0) == '|' && C(1) == '!' && C(2) == '!')
        {
          if (MCM_stackTop < 1)
            return MCM_ERROR_MEMORY_BOUND_LEFT;
            
          result = (M(0) == 0) != (M(1) == 0);
          popN = 2;
        }
        else
          return MCM_ERROR_BAD_TOKEN;
      }
      else if (tokenLen == 4)
      {
        normalOperation = 0;

        if (C(0) == '$' && C(2) == '>' && C(3) == '0' && 
          (C(1) >= '1' && C(1) <= '9'))
          MCM_stackTop -= (C(1) - '0');
        else if (!(C(0) == '~' && C(1) >= 'a' && C(1) < 'a' + MCM_POINTERS &&
          C(2) == ':' && C(3) == '1'))
          return MCM_ERROR_BAD_TOKEN;
      }
      else
        return MCM_ERROR_BAD_TOKEN;

      if (normalOperation)
      {
        MCM_stackTop = MCM_stackTop + 1 - (doPop ? popN : 0);

        if (MCM_stackTop < MCM_CELLS)
          M(0) = result;
      }

      if (MCM_stackTop >= MCM_CELLS)
        return MCM_ERROR_MEMORY_BOUND_LEFT;

      if (stepFunction != 0)
        stepFunction();
    }
    else if (state == 1 || state == 2) // skipping to next . or ;
    {
      if ((tokenLen == 1 && C(0) == '?') || ((tokenLen < 3) && C(0) == '@'))
        bracketCount++;
      else if (tokenLen == 1 && (C(0) == '.' || C(0) == ';'))
      {
        if (bracketCount == 0 && !(state == 2 && C(0) == ';'))
        {
          state = 0;

          if (C(0) != ';')
            parseStackTop--;
        }
        else if (C(0) != ';')
          bracketCount--;
      }
    }
    else // state == 3 (loop break) or state == 4 (func break)
    {
      if ((tokenLen == 1 && C(0) == '?') || ((tokenLen < 3) && C(0) == '@'))
      {
        parseStack[parseStackTop] = MCM_sourcePos;
        parseStackTop++;
        bracketCount++;

        if (parseStackTop >= MCM_PARSE_STACK_SIZE)
          return MCM_ERROR_NESTING_TOO_DEEP;
      }
      else if (tokenLen == 1 && C(0) == '.')
      {
        if (parseStackTop == 0)
          return MCM_ERROR_UNEXPECTED_TOKEN;

        parseStackTop--;

        char c = code[parseStack[parseStackTop]];

        if (state == 3 && bracketCount <= 0 && c == '@')
          state = 0;
        else if (state == 4 && c != '?' && c != '@')
        {
          state = 0;
          callStackTop--;
          nextPos = callStack[callStackTop];
        }
        else
          bracketCount--;
      }
    }

#undef M
#undef C

    MCM_sourcePos = nextPos;
  } // for each token

  return parseStackTop == 0 ? MCM_OK : MCM_ERROR_UNEXPECTED_END;
}

#if MCM_STANDALONE
#include <stdio.h>
  
char code[MCM_STANDALONE_MAX_SOURCE_SIZE];

void output(int c)
{
  putchar(c);
}

void externalCall(const char *fn, unsigned int len)
{
  char s[16];
  unsigned int i;

  for (i = 0; i < len && i < 15; ++i)
    s[i] = fn[i];

  s[i] = 0;

  fprintf(stderr,"WARNING: unknown function \"%s\", ignoring.",s);
}

int main(int argc, char **argv)
{
  if (argc < 2)
  {
    fputs("ERROR: expected input file.\n",stderr);
    return 1;
  }

  FILE *f = fopen(argv[1],"r"); 

  if (!f)
  {
    fprintf(stderr,"ERROR: could not open the source file \"%s\"",argv[1]);
    return 1;
  }

  fseek(f,0L,SEEK_END);
  unsigned int charCount = ftell(f);  
  fseek(f,0L,SEEK_SET);

  if (charCount > MCM_STANDALONE_MAX_SOURCE_SIZE)
  {
    fputs("ERROR: source code too long",stderr);
    return 1;
  }

  fread(code,1,charCount,f);

  fclose(f);

  unsigned char r = MCM_interpret(code,getchar,output,0,externalCall); 

  switch (r)
  {
#define E(t,s) case t: fputs(s "\n",stderr); break;
    E(MCM_ERROR,"ERROR")
    E(MCM_ERROR_BAD_TOKEN,"ERROR: bad token")
    E(MCM_ERROR_UNEXPECTED_TOKEN,"ERROR: unexpected token")
    E(MCM_ERROR_BAD_OPERATION,"ERROR: bad operation")
    E(MCM_ERROR_UNEXPECTED_END,"ERROR: unexpected end")
    E(MCM_ERROR_MEMORY_BOUND_LEFT,"ERROR: memory bound left")
    E(MCM_ERROR_CALL_STACK,"ERROR: call stack overflow")
    E(MCM_ERROR_NESTING_TOO_DEEP,"ERROR: nesting too deep")
#undef E
    default: break;
  }
  
  if (r != MCM_OK)
  {
    unsigned int line = 1;

    for (unsigned int i = 0; i <= MCM_sourcePos; ++i)
      if (code[i] == '\n')
        line++;

    fprintf(stderr,"at position %d (line %d, stack position: %d):\n%.10s\n",
      MCM_sourcePos,line,MCM_stackTop,code + MCM_sourcePos);
  }

  return 0;
}

#endif // if MCM_STANDALONE
#endif // guard
