/* Copyright (C) 2001 Michael Leonhard
 * Mike Leonhard
 * mike at tamale dot net
 * http://tamale.net/
 */

#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "ascorbic.h"

char *Tokenize_Endline( struct Ascorbic *asc, char *start );
char *Tokenize_Floatingpoint( struct Ascorbic *asc, char *start );
char *Tokenize_Identifier( struct Ascorbic *asc, char *start );
char *Tokenize_Integer( struct Ascorbic *asc, char *start );
int Tokenize_Source( struct Ascorbic *asc );
char *Tokenize_String( struct Ascorbic *asc, char *start );
char *Tokenize_Symbol( struct Ascorbic *asc, char *start );

#define inrange( c, s, e ) (c >= s && c <= e)

char *Tokenize_BlockComment( struct Ascorbic *asc, char *start ) {
	char *here;
	
	assert( start );
	assert( start[0] == '/' );
	assert( start[1] == '*' );
	
	/* skip over opening */
	here = start + 2;

	/* walk through text */
	while( here[0] != 0 ) {
		/* ending of block */
		if( here[0] == '*' && here[1] == '/' ) {
			here += 2;
			return here;
			}
		
		/* nested block */
		if( here[0] == '/' && here[1] == '*' ) here = Tokenize_BlockComment( asc, here );
		
		/* next character */
		else here++;
		}
	
	Ascorbic_ErrorHere( asc, start, "unterminated comment" );
	return NULL;
	}

char *Tokenize_LineComment( char *start ) {

	assert( start );
	assert( start[0] == '/' );
	assert( start[1] == '/' );
	
	/* go to end of line */
	while( *start != 0 && *start != '\n' ) start++;
	
	return start;
	}

char *Tokenize_Endline( struct Ascorbic *asc, char *start ) {
	struct Particle *particle;
	int place;
	
	assert( asc );
	assert( asc->list );
	assert( start );
	assert( *start == '\n' );
	
	/* place */
	place = start - asc->source;
	
	/* skip newline */
	start++;
	
	/* previous token exists and is endline */
	if( asc->list->childnum > 0 && asc->list->child[ asc->list->childnum - 1 ]->type == endline ) {
		/* do not make multiple contiguous endlines */
		return start;
		}

	/* make the particle */
	particle = Particle_New( endline, place );
	
	/* add the particle */
	Particle_Add( asc->list, particle );
	
	return start;
	}

char *Tokenize_Floatingpoint( struct Ascorbic *asc, char *start ) {
	struct Particle *particle;
	char *here;
	float value, base;
	
	assert( asc );
	assert( start );
	assert( *start == '.' || inrange( *start, '0', '9' ) );

	/* traverse integer portion */
	here = start;
	value = 0;
	while( inrange( *here, '0', '9' ) ) {
		/* shift digits */
		value *= 10;
		
		/* new digit goes in ones column */
		value += (*here - '0');

		/* next digit */
		here++;
		}

	/* skip dot */
	assert( *here == '.' );
	here++;
	
	/* traverse non integer portion */
	base = 1;
	while( inrange( *here, '0', '9' ) ) {
		/* shift digits */
		base /= 10;
		
		/* new digit goes in smallest column */
		value += base * (*here - '0');
		
		/* next digit */
		here++;
		}
	
	/* no digits before or after dot */
	if( here == start + 1 ) {
		Ascorbic_ErrorHere( asc, start, "token error, no digits for floating point number" );
		return NULL;
		}
	
	/* make the particle */
	particle = Particle_New( floatingpoint, start - asc->source );
	
	/* keep float value */
	particle->data = *(void **)&value;
	
	/* add the particle */
	Particle_Add( asc->list, particle );
	
	return here;
	}

char *Tokenize_Identifier( struct Ascorbic *asc, char *start ) {
	struct Particle *particle;
	char *here, *text;
	int len, type;
	
	assert( asc );
	assert( start );
	assert( inrange( *start, 'A', 'Z' ) || inrange( *start, 'a', 'z' ) );
	
	/* find end of identifier */
	here = start;
	here++;
	while( inrange( *here, 'A', 'Z' ) || inrange( *here, 'a', 'z' ) || inrange( *here, '0', '9' ) ) here++;
	
	/* length of identifier */
	len = here - start;
	
	/* copy text */
	text = (char *)malloc( len + 1 );
	assert( text );
	memcpy( text, start, len );
	text[len] = 0;
	
	/* keywords */
	if( strcmp( text, "sub" ) == 0 ) type = subkeyword;
	else if( strcmp( text, "return" ) == 0 ) type = returnkeyword;
	else if( strcmp( text, "int" ) == 0 ) type = intkeyword;
	else if( strcmp( text, "float" ) == 0 ) type = floatkeyword;
	else if( strcmp( text, "string" ) == 0 ) type = stringkeyword;
	else type = identifier;
	
	/* make the particle */
	assert( type > -1 );
	particle = Particle_New( type, start - asc->source );
	
	/* identifier text */
	if( type == identifier ) particle->data = text;
	/* keyword name */
	else free( text );
	
	/* add the particle */
	Particle_Add( asc->list, particle );
	
	return here;
	}

char *Tokenize_Integer( struct Ascorbic *asc, char *start ) {
	struct Particle *particle;
	char *here = start;
	int value = 0, prev = 0, overflow = 0;
	
	assert( asc );
	assert( start );
	
	/* hexadecimal */
	if( start[0] == '0' && start[1] == 'x' ) {
		/* skip 0x */
		here += 2;
		
		/* traverse integer */
		while( inrange( *here, '0', '9' ) || inrange( *here, 'A', 'F' ) || inrange( *here, 'a', 'f' ) ) {
			/* check for overflow */
			if( value & 0xF0000000 ) overflow = 1;

			/* shift digits */
			value <<= 4;
			
			/* new digit */
			if( inrange( *here, '0', '9' ) ) value += *here - '0';
			else if( inrange( *here, 'A', 'F' ) ) value += *here - 'A' + 10;
			else if( inrange( *here, 'a', 'f' ) ) value += *here - 'a' + 10;
			else assert( 0 );
			
			/* next digit */
			here++;
			}
		}
	
	/* octal /
	else if( start[0] == '0' ) {
		/* traverse integer /
		here = start;
		while( inrange( *here, '0', '7' ) ) {
			/* check for overflow /
			if( value & 0xE0000000 ) overflow = 1;

			/* shift digits /
			value <<= 3;
			
			/* new digit /
			value += *here - '0';
			
			/* next digit /
			here++;
			}
		}/**/
	
	/* decimal */
	else if( inrange( *start, '0', '9' ) ) {
		/* traverse integer */
		here = start;
		while( inrange( *here, '0', '9' ) ) {
			/* shift digits */
			value *= 10;
			
			/* new digit goes in ones column */
			value += *here - '0';
			
			/* check for integer overflow */
			if( (value - *here + '0')/10 != prev || value < prev ) overflow = 1;
			prev = value;
			
			/* next digit */
			here++;
			}
		
		/* number is first portion of floatingpoint */
		if( *here == '.' ) return Tokenize_Floatingpoint( asc, start );
		}
	
	/* integer overflowed */
	if( overflow ) {
		Ascorbic_ErrorHere( asc, start, "token error, integer overflow" );
		return NULL;
		}

	/* make the particle */
	particle = Particle_New( integer, start - asc->source );
	
	/* keep integer value */
	particle->data = (void *)value;
	
	/* add the particle */
	Particle_Add( asc->list, particle );
	
	return here;
	}

int Tokenize_Source( struct Ascorbic *asc ) {
	struct Particle *p;
	char *here;
	
	assert( asc );
	assert( asc->source );
	
	/* allocate list */
	asc->list = Particle_New( tokenlist, 0 );
	assert( asc->list );
	
	/* traverse entire source */
	here = asc->source;
	while( *here != 0 ) {
		
		/* whitespace */
		if( *here == ' ' || *here == '\t' || *here == '\r' ) here++;
		/* line comment */
		else if( here[0] == '/' && here[1] == '/' ) here = Tokenize_LineComment( here );
		/* block comment */
		else if( here[0] == '/' && here[1] == '*' ) here = Tokenize_BlockComment( asc, here );
		/* endline */
		else if( *here == '\n' ) here = Tokenize_Endline( asc, here );
		/* identifier or keyword */
		else if( inrange( *here, 'A', 'Z' ) || inrange( *here, 'a', 'z' ) ) here = Tokenize_Identifier( asc, here );
		/* integer */
		else if( inrange( *here, '0', '9' ) ) here = Tokenize_Integer( asc, here );
		/* floating point */
		else if( *here == '.' ) here = Tokenize_Floatingpoint( asc, here );
		/* string */
		else if( *here == '"' ) here = Tokenize_String( asc, here );
		/* symbol */
		else if( strchr( ":;()|^<>=-+*/", *here ) != NULL ) here = Tokenize_Symbol( asc, here );
		/* token error */
		else return Ascorbic_ErrorHere( asc, here, "token error" );
		
		/* tokenize failure */
		if( here == NULL ) return -1;
		}

	return 1;
	}

char *Tokenize_String( struct Ascorbic *asc, char *start ) {
	struct Particle *particle;
	char *here, *text, *there;
	int len, type, escape, place;
	
	assert( asc );
	assert( start );
	assert( *start == '"' );
	
	/* place */
	place = start - asc->source;
	
	/* skip leading quote */
	here = start;
	here++;
	
	/* find closing quote */
	escape = 0;
	while( *here ) {
		/* unescaped closing quote */
		if( escape == 0 && *here == '"' ) break;
		
		/* escape */
		if( escape == 0 && *here == '\\' ) escape = 1;
		/* unescape */
		else escape = 0;
		
		/* next */
		here++;
		}
	
	/* reached end of source */
	if( *here == 0 ) {
		Ascorbic_ErrorHere( asc, start, "token error, unterminated string" );
		return NULL;
		}
	
	/* skip opening quote */
	start++;
	
	/* length of text */
	len = here - start;
	
	/* copy text */
	text = (char *)malloc( len + 1 );
	assert( text );
	memcpy( text, start, len );
	text[len] = 0;
	
	/* unescape text /
	there = text;
	here = text;
	escape = 0;
	while( *here ) {
		if( escape ) {
			switch( *here ) {
				case 'a': *there = '\a'; break;
				case 'b': *there = '\b'; break;
				case 't': *there = '\t'; break;
				case 'n': *there = '\n'; break;
				case 'v': *there = '\v'; break;
				case 'f': *there = '\f'; break;
				case 'r': *there = '\r'; break;
				case '"': *there = '"'; break;
				case '\\': *there = '\\'; break;
				default:
					Ascorbic_ErrorHere( asc, start, "token error, unknown escape sequence" );
					return NULL;
				}
			there++;
			escape = 0;
			}
		else if( *here == '\\' ) escape = 1;
		else {
			*there = *here;
			there++;
			}
		here++;
		}
	*there = 0;
	
	/* make the particle */
	particle = Particle_New( string, place );
	
	/* text */
	particle->data = text;
	
	/* add the particle */
	Particle_Add( asc->list, particle );
	
	/* skip closing quote */
	here++;
	
	return here;
	}

char *Tokenize_Symbol( struct Ascorbic *asc, char *start ) {
	int type;
	char *here;
	
	assert( asc );
	assert( start );
	
	here = start;
	here++;
	
	switch( *start ) {
		case ':':
			if( *here == '=' ) {
				type = assignsymbol;
				here++;
				}
			else type = colonsymbol;
			break;
		case ';': type = semicolonsymbol; break;
		case '(': type = leftparensymbol; break;
		case ')': type = rightparensymbol; break;

		case '|': type = orsymbol; break;
		case '^': type = xorsymbol; break;
		case '<':
			if( *here == '<' ) {
				type = lshsymbol;
				here++;
				}
			else type = ltsymbol;
			break;
		case '>':
			if( *here == '>' ) {
				type = rshsymbol;
				here++;
				}
			else type = gtsymbol;
			break;
		case '=': type = equalsymbol; break;
		case '-': type = subtractsymbol; break;
		case '+': type = addsymbol; break;
		case '*': type = multiplysymbol; break;
		case '/': type = dividesymbol; break;
		default: assert( 0 );
		}
	
	/* make and add the particle */
	Particle_AddNew( asc->list, type, start - asc->source );
	
	return here;
	}
