#include <string.h>
#include <bastard.h>
#include <i386_intcode.h>
/* mnemonic-to-int table */
#include "./i386_intcode.table"


struct INTCODE_STATE * arch_state_new( void ) {
	return( calloc( sizeof(struct INTCODE_STATE), 1 ) );
}

int arch_state_del( struct INTCODE_STATE *state ) {
	struct INTCODE_LINE *inst_list, *i;
	struct INTCODE_VAR *var_list, *v;
	if (! state) return(0);

	inst_list = state->inst_list;
	while (inst_list) {
		i = inst_list;
		inst_list = inst_list->next;
		free(i);
	}

	var_list = state->var_list;
	while (var_list) {
		v = var_list;
		var_list = var_list->next;
		free(v);
	}

	free(state);

	return(1);
}

int arch_state_inst_add( struct INTCODE_STATE *state, struct int_code *i ) {
	struct INTCODE_LINE *inst_list;
	if (! state) return(0);

	inst_list = calloc( sizeof(struct INTCODE_LINE), 1);
	if (! inst_list) return(0);

	memcpy( &inst_list->i, i, sizeof(struct int_code) );

	if ( ! state->inst_list ) {
		state->last_inst = state->inst_list = inst_list;
	} else { 
		state->last_inst->next = state->last_inst = inst_list;
	}

	return(1);
}

int arch_state_var_add( struct INTCODE_STATE *state, struct INTCODE_VAR *v ) {
	struct INTCODE_VAR *var_list;
	if (! state) return(0);

	var_list = calloc( sizeof(struct INTCODE_VAR), 1);
	if (! state->var_list) {
		state->var_list = state->last_var = var_list;
	} else {
		state->last_var->next = state->last_var = var_list;
	}

	memcpy( var_list, v, sizeof(struct INTCODE_VAR) );
	var_list->next = NULL;
	return(1);
}

unsigned int arch_get_imm_opsize( unsigned int type ) {
	if ( type & OP_SIGNED ) {
		if ( type & OP_BYTE )	return( imm_byte );
		if ( type & OP_HWORD )	return( imm_hword );
		return( imm_word );
	}
	if ( type & OP_BYTE )	return( imm_ubyte );
	if ( type & OP_HWORD )	return( imm_uhword );
	return( imm_uword );
}

/* this searches state->var_list for ->type == op_greg and ->orig == reg;
 * if found, it returns ->name, otherwise it adds a variable and returns
 * ->name of the new variable */
unsigned long arch_reg_find_create( long reg, struct INTCODE_STATE *state ) {
	struct INTCODE_VAR *v, new = {0};
	if (! state) return(0);
	v = state->var_list;
	while ( v ) {
		if ( v->type == op_greg && v->orig == reg ) {
			return( v->name );
		}
		v = v->next;
	}
	new.type = op_greg;
	new.name = state->gregs++;
	new.orig = reg;
	new.orig_type = OP_REG;
	arch_state_var_add( state, &new );
	return( new.name );
}

/* this makes in-place modifications to the code struct operands so that they
 * reflect allowed IN_CODE types and values ... meaning that the build_intcode
 * routine can obtain the operands from the code struct directly */
int arch_normalize_op( struct code *c, long *op, unsigned int *type,
				struct INTCODE_STATE *state ) {
	struct name n = {0};
	struct address a;
	struct addr_exp e;
	struct int_code i = {0};
	struct INTCODE_VAR v = {0};

	i.rva = c->rva;		/* we'll be needing this quite often */

	switch( *type & OP_TYPE_MASK ) {
		case OP_REG:
			if ( (*type & OP_W) && !(*type & OP_R) ) {
				/* add .clobber directive */
				i.opcode = DIR(i_clobber);
				i.src = *op;
				i.sType = *type;
				arch_state_inst_add( state, &i );
			}
			*type = op_greg;
			*op = arch_reg_find_create( *op, state );
			break;
		case OP_REL:
			/* set op to actual address for this instr */
			if (! bdb_index_find( ADDRESS_RVA, &c->rva, &a) )
				return(0);
			*op += a.rva + a.size;
		case OP_ADDR: case OP_OFF:
			*type = op_label;
			/* change address references to NAME references */
			if (! bdb_index_find( NAME_RVA, op, &n ) ) {
				//printf("creating name for %08x\n", *op );
				n.id = name_new_default( *op, NAME_NEWLOC );
			} //else printf("name for %08x is %d\n", *op, n.id );
			if ( ! n.id ) {
				//printf("fuck! %d %08X\n", sys_get_lasterr(), *op );
				sys_print_errmsg(sys_get_lasterr());
			}
			*op = n.id;
			//printf("NAME: now op is %d c->dest %d\n", *op, c->dest);
			break;
		case OP_PTR:
			*type = op_label | DEREF_OP;
			if (! bdb_index_find( NAME_RVA, op, &n ) ) {
				n.id = name_new_default( *op, NAME_NEWPTR );
			} 
			if ( ! n.id ) sys_print_errmsg(sys_get_lasterr());
			*op = n.id;
			break;
		case OP_EXPR:
			if (! bdb_index_find( ADDR_EXP_ID, op, &e ) ) {
				printf("crapola %d\n", *op);
				return(0);
			}
			/* if base_reg == ebp && !disp && !scale */
			if ( e.base == ext_arch->FP && ! e.index ) {
				/* this is an offset from BP: local var or arg */
				if ( e.disp > 0 ) {
					/* pos offset from Frame Pointer: arg */
					*type = op_ireg;
					/* TODO: fix this to not be word-specific */
					*op = e.disp / ext_arch->sz_word;
				} else {
					/* neg offset from FP: local var */
					*type = op_lreg;
					/* TODO: fix this to not be word-specific */
					*op = e.disp / ext_arch->sz_word;
				}
			} else {
				/* generate int_code instructions for address calc */
				/* add .calc directive */
				i.opcode = DIR(i_calc);
				i.src = e.id;
				arch_state_inst_add( state, &i );

				/* use a general register to hold operand */
				v.type = *type = op_greg;
				v.name = state->gregs++;
				arch_state_var_add( state, &v );
				*op = v.name;
				/* use general registers for base, index regs */
				e.base = arch_reg_find_create( e.base, state );
				e.index = arch_reg_find_create( e.index, state );

				/* add `mv e.index, op` */
				i.opcode = INSTR(i_mv);
				i.dest = i.arg = *op;
				i.dType = i.aType = op_greg;
				i.src = e.scale;
				i.sType = op_imm;
				arch_state_inst_add( state, &i );

				/* add `mul e.scale, op` : index *= scale */
				i.dest = i.arg = i.dType = i.aType = 0;
				i.opcode = INSTR(i_mul);
				i.src = e.scale;
				i.sType = op_imm;
				arch_state_inst_add( state, &i );

				/* add `add e.base, op`: index += base */
				i.opcode = INSTR(i_add);
				i.src = e.base;
				i.sType = op_greg;
				arch_state_inst_add( state, &i );

				/* add `add e.disp, op`: index+= disp */
				i.src = e.disp;
				i.sType = op_imm;
				arch_state_inst_add( state, &i );

				/* add .uncalc directive */
				memset( &i, 0, sizeof( struct int_code ) );
				i.opcode = DIR(i_uncalc);
				arch_state_inst_add( state, &i );
			}
			break;
		case OP_IMM:
		default:
			/* leave *op untouched */
			*type = op_imm | arch_get_imm_opsize( *type );
	}
	return(1);
}
	
/* perpare operands -- this is really just a front-end to arch_normalize_op */
int arch_normalize_operands( struct code *c, struct INTCODE_STATE *state ){ 
	if ( c->srcType ) 
		arch_normalize_op( c, &c->src, &c->srcType, state );
	if ( c->destType ) 
		arch_normalize_op( c, &c->dest, &c->destType, state );
	if ( c->auxType ) 
		arch_normalize_op( c, &c->aux, &c->auxType, state );
	return(1);
}

/* this is intended to convert Intel instructions such as MOV and PUSH, which
 * handle memory (load/store) and register (move) transfers, to more specific
 * load/store/move versions ot eh instructions */
int arch_normalize_mnemonic( struct code *c ) {
	/* cmpxchg, mov, movsx, movzx, pop, push, set??, xadd, xchg */ 

	/* do a quick one-char check to see if we can skip the strcmps */
	if (  c->mnemonic[0] != 'p' && c->mnemonic[0] !='m' && 
		c->mnemonic[0] != 'x' && c->mnemonic[0] != 'c' && 
		c->mnemonic[0] != 's' )
		return(1);

	if ( ! strcmp( "pop", c->mnemonic ) || ! strcmp( c->mnemonic, "set" ) ) {
		/* affix a 'st' to mnemonic if mem arg */
		if ( c->destType != OP_REG && c->destType != OP_IMM  )
			strncat( c->mnemonic, "st", 16 );

	} else if ( ! strcmp( "push", c->mnemonic ) ) {
		/* affix a 'ld' to mnemonic if mem arg */
		if ( c->destType != OP_REG && c->destType != OP_IMM  )
			strncat( c->mnemonic, "ld", 16 );

	} else if ( ! strcmp( "mov", c->mnemonic ) || 
			! strcmp( "xchg", c->mnemonic) || 
			! strcmp( "movsx", c->mnemonic ) ||
			! strcmp( "movzx", c->mnemonic )  ) {
		/* affix a 'ld' if m->r, a 'st' if r->m, nothing if 'r->r' */
		if ( c->srcType != OP_REG && c->srcType != OP_IMM  )
			strncat( c->mnemonic, "ld", 16 );
		if ( c->destType != OP_REG && c->destType != OP_IMM  )
			strncat( c->mnemonic, "st", 16 );

	} else if ( ! strcmp( "xadd", c->mnemonic ) || 
			! strcmp( "cmpxchg", c->mnemonic ) ) {
		/* affix a 'st' if dest is a mem arg */
		if ( c->destType != OP_REG && c->destType != OP_IMM  )
			strncat( c->mnemonic, "st", 16 );
	}

	return(1);
}

int arch_int_fixop( unsigned long *op, unsigned long *type, struct code *c, 
			struct INTCODE_VAR *scratch, int *regs ) {
	struct INTCODE_VAR *v;

	switch( *type & BASE_OPTYPE_MASK ) {
		case USE_SRC:
			*type = c->srcType;
			*op = c->src;
			break;
		case USE_DEST:
			*type = c->destType;
			*op = c->dest;
			break;
		case USE_AUX:
			*type = c->auxType;
			*op = c->aux;
			break;
		case USE_CODEID:			/* used in .asm directive */
			*type = op_dir;
			*op = c->rva;
			break;
		case USE_WORDSIZE:
			*type = op_imm | imm_byte;
			*op = ext_arch->sz_word;
			break;
		case USE_DWORDSIZE:
			*type = op_imm | imm_byte;
			*op = ext_arch->sz_dword;
			break;
		case op_scratch:
			v = scratch->next;
			while ( v ) {	/* has scratch reg been allocated? */
				if ( v->orig == *op ) 	break;		/* yup */
				v = v->next;
			}
			if ( ! v ) {	/* scratch register not allocated */
				v = calloc( sizeof(struct INTCODE_VAR), 1);
				v->next = scratch->next;
				scratch->next = v;
				v->name = (*regs)++;
				v->type = op_greg;
				v->orig = *op;
			}
			*op = v->name;
			*type = v->type;
			break;
		/* case USE_FUNCID:  this will never occur */
		/* case op_greg: case op_ireg: case op_oreg:
		case op_lreg: case op_specreg: case op_rreg:
		case op_imm: case op_label: case op_dir: */
		default:
			/* these don't need to be fixed */
			break;
	}
	return(1);
}

int arch_int_fixinst( struct int_code *i, struct code *c, 
			struct INTCODE_VAR *scratch, int *regs  ) {
	if ( i->sType ) arch_int_fixop( &i->src, &i->sType, c, scratch, regs );
	if ( i->aType ) arch_int_fixop( &i->arg, &i->aType, c, scratch, regs );
	if ( i->dType ) arch_int_fixop( &i->dest, &i->dType, c, scratch, regs );

}

/* lookup int_code for this operand */
struct INTCODE_XLAT * arch_int_from_mnemonic( char *mnemonic ) {
	int x, index;
	struct INTCODE_XLAT *table;

	index = tolower(mnemonic[0]) - 0x61;
	if ( index < 26 ) {
		table = xlat_tbl[index];
		for ( x = 0; table[x].mnemonic; x++ ) {
			if ( ! strcmp( mnemonic, table[x].mnemonic) ) {
				return( &table[x] );
			}
		}
	}
	return(NULL);
}



int arch_int_translate( struct code *c, struct INTCODE_STATE *state )	{
	int x;
	struct int_code i = {0};
	struct INTCODE_XLAT *ix;
	struct INTCODE_VAR scratch={0}, *v;

	/* normalize mnemonics */
	arch_normalize_mnemonic( c );
	/* get int_code mnemonic for this instruction */
	ix = arch_int_from_mnemonic( c->mnemonic );
	if ( ix ) { 
		/* normalize operands */
		arch_normalize_operands( c, state );

		/* fixup and add to state all INT_CODEs associated with mnem */
		for ( x = 0; x < ix->count; x ++ ){
			/* TODO: remove: */
			//addr_print(c->rva);

			ix->i[x].rva = c->rva;
			memcpy( &i, &ix->i[x], sizeof( struct int_code ) );
			arch_int_fixinst( &i, c, &scratch, &state->gregs );
			/* add to list representing this fn */
			arch_state_inst_add( state, &i );
		}

		/* clobber scratch registers */
		v = scratch.next;
		while ( v ) {
			memset( &i, 0, sizeof( struct int_code ));
			/* generate .clobber directive */
			i.rva = c->rva;
			i.opcode = DIR(i_clobber);
			i.src = v->name;
			i.sType = v->type;
			arch_state_inst_add( state, &i );
			/* free register from state vars list */
			state->gregs--;		
			scratch.next = v->next;
			free(v);
			v = scratch.next;
		}
	}
	return(1);
}

/* API interface to all of this crap */
/* generate intermediate code for a function */
int gen_int(struct function *f) {
	int cont, *dbstate, order = 0;
	char buf[256];
	struct code c;
	struct int_code i = {0};
	struct INTCODE_LINE *il;
	struct INTCODE_STATE *state;

	if (! f) return(0);
	dbstate = db_save_state();
	state = arch_state_new();
	if (! state) return(0);

	i.rva = f->rva;
	/* add item .proc */
	i.opcode = DIR(i_proc);
	i.src = f->id;
	arch_state_inst_add( state, &i );

	/* add item .block */
	i.opcode = DIR(i_block);
	i.src = 0;
	arch_state_inst_add( state, &i );

	/* add item .frame if approp */

	/* foreach instruction in function */
	cont =  bdb_index_find( CODE_RVA, &f->rva, &c );
	while ( cont && c.rva < f->rva + f->size ) {
		/* translate CODE inst and add to 'state' */
		arch_int_translate( &c, state );
		cont = bdb_index_next( CODE_RVA, &c );
	}

	/* add item .unframe if approp */

	/* add item .unblock */
	i.rva = f->rva + f->size - 1;
	i.opcode = DIR(i_unblock);
	arch_state_inst_add( state, &i );

	/* analyze list of int code instructions for redundancy, etc */
	/* TODO :) */


	/* add items in int_code list to INT_CODE table */
	il = state->inst_list;
	while ( il ) {
		il->i.func = f->id; 
		il->i.order = order;
		order += 10;	/* just like in basic: keep spare lines :P */
		//intcode_sprint( &il->i, buf, 256 );
		//printf("%s (%08X, %08X:%X, %08X:%X)\n", buf, il->i.opcode, il->i.sType, il->i.src, il->i.dType, il->i.dest);
		//printf("(%08x %s) (%08X %X), (%08X %X), (%08X %X) ; rva %08X fn %d order %d \n",il->i.opcode,intcode_getstr_mnem(il->i.opcode), il->i.sType, il->i.src, il->i.dType, il->i.dest, il->i.aType, il->i.arg, il->i.rva, il->i.func, il->i.order);
		/* add each int_code in state to DB */
		bdb_record_insert( INT_CODE, &il->i);
		il = il->next;
	}

	arch_state_del(state);
	db_save_state(dbstate);

	return(1);
}

