/* A simplified compiler for the PDP8/E mnemonics.
   The output will be binary and a listing file.
   Binary output for field change is unknown, so
   the field will be set as for the origin, except
   than only two digits will be output, followed
   immediately by an origin.
   The end of the binary output file will have
   leader/trailer code appended to tell the
   loader that the file is finished.
   NOTES:
   (1)    Field and start code may be unique to the
		PAL8 assembler used under OS8, and the
		binary file may have been entirely different
		than the standard for paper tape.
   (2)    Offset prompt should not be a +.
   (3)    Octal object of standard code is treated as
		an offset.  Should have prompt for this. Now
		is confusing.
*/

#include  <stdio.h>
#include  <string.h>
#include  <stdlib.h>

/* Input file line buffer */
#define STRMAX  256
char      iostring[STRMAX], copy[STRMAX], *tokenptr, *token;

/* Decode state flags */
#define   TRUE      1
#define   FALSE     0
#define   GRPERR    2    /* Error while processing group mode */

/* Switch flags for the token decode array */
#define   Nomatch   0    /* Could not decode the string */
#define   Standard  1    /* AND, TAD, ISZ and DCA */
#define   Delayed   2    /* JMP and JMS (delayed effect) */
#define   Groupclr  3    /* CLA in OPR groups (common to all) */
#define   Group1    4    /* Group 1 OPR */
#define   Group1r   5    /* Group 1 RAR, RTR, RAL, RTL or BSW */
#define   Group1end 6    /* Group1 CIA, STA and STL (do not combine) */
#define   Group2    7    /* Group 2 OSR and HLT (combines) */
#define   Group2or  8    /* Group 2 "SMA or SZA or SNL" */
#define   Group2and 9    /* Group 2 "SPA and SNA and SZL" */
#define   Group2end 10   /* Group 2 SKP (does not combine) */
#define   Group3    11   /* Group 3 OPR */
#define   IO        12   /* Input/output code */
#define   Mpage     13   /* Memory page directive */
#define   Mfield    14   /* Memory field directive */
#define   Indirect  15   /* Indirect field access */
#define   Page0     16   /* Page 0 access (drops current page bit) */
#define   Octalnbr  17   /* Octal number */
#define   Offset    18   /* Page location offset */
#define   Comment   19   /* Rest of line (including token) is a comment */
#define   Nodata    20   /* At end of line */
#define   Nocompile 21   /* Are not able to compile yet */
#define   Noline    22   /* Empty line */
#define   Absolute  23   /* Absolute address */
#define   Labeldef  24   /* Label origin */
#define   Labelref  25   /* Lable use */
#define   Dset      26   /* Instruction field change */
#define   Iset      27   /* Data field change */
#define   String    28   /* Text string inclusion */
#define   Quote     29   /* Rest of line (except delimiter) is the string */

/* State flags for the binary output data */
#define   CPUcode   1    /* CPU instruction */
#define   Origin    2    /* Instruction origin */
#define   Ifield    3    /* Instruction field */
#define   Str_8     4    /* Text string (8 bit) */

/* Token decode structure and array. 
   First element (instruction string)
   must be sorted.
*/
struct token
	{
	char           *Instr;
	unsigned int   Type;
	unsigned int   Code;
	};

/* Decode array has the following items:
**   (1) Pointer to symbol label string.
**   (2) Symbol type.
**   (3) Symbol value.
*/

#define   MAXTOKEN  500

struct token   *PDPptr, PDPdata[MAXTOKEN] = 
	{
		{"AND",    Standard, 00000},
		{"TAD",    Standard, 01000},
		{"ISZ",    Standard, 02000},
		{"DCA",    Standard, 03000},
		{"JMS",    Delayed,  04000},
		{"JMP",    Delayed,  05000},
		{"PAGE",   Mpage,    0},
		{"FIELD",  Mfield,   0},
		{"CLA",    Groupclr, 07200},
		{"CLL",    Group1,   07100},
		{"CMA",    Group1,   07040},
		{"CML",    Group1,   07020},
		{"RAR",    Group1r,  07010},
		{"RTR",    Group1r,  07012},
		{"RAL",    Group1r,  07004},
		{"RTL",    Group1r,  07006},
		{"BSW",    Group1r,  07002},
		{"IAC",    Group1,   07001},
		{"NOP",    Group1end,     07000},  /* Stand alone      */
		{"CIA",    Group1end,     07041},
		{"STL",    Group1end,     07120},
		{"STA",    Group1end,     07240},
		{"GLK",    Group1end,     07204},  /* Get link to AC11 */
		{"HLT",    Group2,   07402},
		{"OSR",    Group2,   07404},
		{"SMA",    Group2or, 07500},
		{"SZA",    Group2or, 07440},
		{"SNL",    Group2or, 07420},
		{"SPA",    Group2and,     07510},
		{"SNA",    Group2and,     07450},
		{"SZL",    Group2and,     07430},
		{"SKP",    Group2end,     07410},
		{"LAS",    Group2end,     07604},  /* Load AC with SR  */
		{"MQA",    Group3,   07501},
		{"MQL",    Group3,   07421},
		{"CAM",    Group3,   07621},
		{"SWP",    Group3,   07521},
		{"I",      Indirect, 00400},
		{"Z",      Page0,    07577},
		{"ION",    IO,       06001},
		{"SKON",   IO,       06000},
		{"SRQ",    IO,       06003},
		{"IOF",    IO,       06002},
		{"GTF",    IO,       06004},
		{"RTF",    IO,       06005},
		{"SGT",    IO,       06006},
		{"CAF",    IO,       06007},
		{"KCF",    IO,       06030},
		{"KSF",    IO,       06031},
		{"KCC",    IO,       06032},
		{"KRS",    IO,       06034},
		{"KIE",    IO,       06035},
		{"KRB",    IO,       06036},
		{"TFL",    IO,       06040},
		{"TSF",    IO,       06041},
		{"TCF",    IO,       06042},
		{"TPC",    IO,       06044},
		{"TSK",    IO,       06045},
		{"TLS",    IO,       06046},
		{"CDF",    Dset,     06201},
		{"CIF",    Iset,     06202},
		{"RDF",    IO,       06214},
		{"RIF",    IO,       06224},
		{"RIB",    IO,       06234},
		{"RMF",    IO,       06244},
		{"CINT",   IO,       06204},
		{"SINT",   IO,       06254},
		{"CUF",    IO,       06264},
		{"SUF",    IO,       06274},
		{"TEXT",   String,       0}
		{"CR",     IO,       0X0D},
		{"LF",     IO,       0X0A},
		{"TAB",    IO,       0X09},
		{"NULL",   IO,       0X00},
		{NULL,     0,        0}
	};
char *error;        /* Points to an error message  */
unsigned int   Optype, Opdata;     /* Token decode type and data      */
unsigned int   Group;              /* OPR type flag for decode        */
unsigned int   PC, Field;          /* Memory field and location       */
unsigned int   Bincode;            /* Binary program code             */
unsigned int   Chksum;             /* Output file checksum            */
int line, pass, PDPsize;


main(argc, argv)

int       argc;
char      *argv[];

{
FILE      *fin, *fout;

	if (argc == 2)
	{
		strncpy(copy, argv[1], STRMAX);
		strncat(copy, ".ASM",  STRMAX);
		if((fin = fopen(copy, "r")) == NULL)
		{
			printf("Could not open input file %s\n", copy);
			return(1);
		}
		else
		{
			strncpy(copy, argv[1], STRMAX);
			strncat(copy, ".BIN",  STRMAX);
			if((fout = fopen(copy ,"wb")) == NULL)
			{
				printf("Could not open output file %s\n", copy);
				return(1);
			}
		}
		ltcode(fout);           /* Initial leader-trailer code */
		compile(fin, fout);
printf("\n\tChecksum= %o\n", Chksum);
		if((putc(((Chksum>>6)&077), fout)==EOF)||(putc((Chksum&077), fout)==EOF))
			fatal("Error in writing checksum to output file");
		ltcode(fout);           /* Must have to end */
		fcloseall();
	}
	else
	{
		printf("Must specify an input file\n");
		return(1);
	}               
}

/* Get lines from the source, compile and output. 
 *  Start with single pass and no labels.
 */
compile(fin, fout)

FILE *fin, *fout;

{
int       incr, len;

	/* Find size of the filled part of the decode array */
	for(PDPsize= 0,PDPptr= PDPdata; PDPsize < MAXTOKEN; PDPsize++,PDPptr++)
		if(PDPptr->Instr == NULL) break;
	for(pass= 1; pass <= 2; pass++)
	{
		/* Decode each line of source till done */
		line=     0;
		PC=       0200;               /* Default start */
		Field=    0;
		Chksum=   0;
		while(fgets(iostring, STRMAX, fin) != NULL)
		{
			strcpy(copy, iostring);  /* Token decode wrecks original    */
			error=    '\0';          /* Clear the error message         */
			Bincode=  0;             /* Default condition               */
			line++;                  /* Next line number                */
			tokenptr= iostring;      /* First decode                    */
			incr=     FALSE;         /* Some inputs do not advance      */
			Group=    FALSE;         /* If OPR code is set to type      */
			Optype=   9999;          /* Ready to go (impossible value)  */

			if(pass==1)
			{
				/* PASS #1:
				*   Puts labels in the array and checks for
				*   errors in label definitions.  Memory
				*   origin and field settings cannot have
				*   a label.  Labels without an object take
				*   the value of the PC but do not advance
				*   it.
				*/

				decode();
				if(Optype==Labeldef)
				{
					Optype= Labelref;
					if(!oplabel(token))
					{
						errpnt();
						continue;
					}
					decode();
					if((Optype==Comment)||(Optype==Nodata))
						continue;
					else if((Optype==Mpage)||(Optype==Mfield)||
						   (Optype==Offset)||(Optype==Absolute))
						fatal("Label defined at an origin");
					else
					{
						incpc();
						continue;
					}
				}
				else
				{
					switch(Optype)
					{
						case Comment:
						case Nodata:
							break;
						case Mpage:
							decode();
							if(Optype==Octalnbr)
							{
								if(Opdata <= 037) 
								{
									PC = Opdata*0200;
									incr= Origin;
									decode();
								}
								else fatal("Page number too large");
							}
							else
							{
								PC &= 07600;   /* Drop offset */
								PC |= 00200;   /* Next page   */
								PC &= 07777;   /* Wrap around */
								incr= Origin;
								break;
							}
							break;
						case Mfield:
							decode();
							if(Optype==Octalnbr)
							{
								if(Opdata <= 7)
								{
									Field= Opdata;
									PC= 0200;
									incr= Ifield;
									decode();
									break;
								}
								else fatal("Field number too large");
								break;
							}
							fatal("Field setting must have number");
							break;
						case Offset:
							if(Opdata <= 0177)
							{
								PC &= 07600;
								PC |= Opdata;
								incr= Origin;
								decode();
								break;
							}
							/*error= "Relative offset too large";*/
							break;
						case Absolute:
							if(Opdata <= 07777)
							{
								PC = Opdata;
								incr= Origin;
								decode();
								break;
							}
							/*error= "Relative offset too large";*/
							break;
						case String:
							decode();
							if(Optype==Quote)
							{
								token= copy+(token-iostring);
								token++; /* Bypass delimiter */
								while(Bincode= *token++)
								{
									if(Bincode=='\n')
									{
										incpc(); /* End of string null */
										break;
									}
									else
									{
										incpc();
									}
								}
								break;
							}
							/*error= "Incorrect (or missing) string delimiter.";*/
							break;
						default:
							incpc();
					}
					continue;
				}
			}

			if(pass==2)
			{
				/* PASS #2:
				 *   Compiles using the labels from the
				 *   first pass and ouputs the binary
				 *   data file.
				 * This is the serious part! Should exit the switch with
				 * either an error, comment, end of the line or no line.
				 * Each case should do an additional decode if necessary.
				 */
				do
				{
					decode();                     /* Get initial code      */
					if(Optype==Labeldef)          /* Bypass label def.     */
					{
						if(!opmatch(token))
							fatal("Label missing on Pass 2 (Compiler error)");
						else if(Opdata != PC)
							error= "Phasing error (label changed on Pass 2)";
						else
							decode();
					}
					switch(Optype)
					{
						case Comment:            /* Ignore rest of line   */
						case Nodata:             /* Blank line            */
							if(!Group) Optype= Noline;    /* Bypass      */
						case Nomatch:            /* Could not match token */
							if(Group) Group= FALSE;
							break;
						case Mpage:
							if(Group)
							{
								Group= FALSE;
								break;
							}
							decode();
							if(Optype==Octalnbr)
							{
								if(Opdata <= 037) 
								{
									PC = Opdata*0200;
									incr= Origin;
									decode();
								}
								/*else error= "Page number too large";*/
							}
							else
							{
								PC &= 07600;   /* Drop offset */
								PC |= 00200;   /* Next page   */
								PC &= 07777;   /* Wrap around */
								incr= Origin;
								break;
							}
							break;
						case Mfield:
							if(Group)
							{
								Group= FALSE;
								break;
							}
							decode();
							if(Optype==Octalnbr)
							{
								if(Opdata <= 7)
								{
									Field= Opdata;
									PC= 0200;
									incr= Ifield;
									decode();
									break;
								}
								/*else error= "Field number too large";*/
								break;
							}
							/*error= "Field setting must have number";*/
							break;
						case Octalnbr:           /* Straight code setting */
						case Labelref:
						case Group1end:
						case Group2end:
						case IO:
							if(Group)
							{
								Group= FALSE;
								break;
							}
							Bincode= Opdata;
							incr= CPUcode;
							decode();
							break;
						case String:
							if(Group)
							{
								Group= FALSE;
								break;
							}
							decode();
							if(Optype==Quote)
							{
								token= copy+(token-iostring);
								token++; /* Bypass delimiter */
								incr= Str_8; /* 8 bit text string */
								break;
							}
							error= "Incorrect (or missing) string delimiter.";
							break;
						case Dset:
						case Iset:
							if(Group)
							{
								Group= FALSE;
								break;
							}
							Bincode= Opdata;
							decode();
							while((Optype==Dset)||(Optype==Iset))
							{
								Bincode |= Opdata;
								decode();
							}
							if(Optype==Octalnbr)
							{
								if(Opdata <= 7)
								{
									Opdata <<= 3;  /* Shift left by 3 */
									Bincode |= Opdata;
									incr= CPUcode;
									decode();
									break;
								}
								else
								{
									error= "Field number too large";
									break;
								}
							}
							else
							{
								error= "Field change must have number";
								break;
							}
						case Offset:             /* Relative page offset  */
							if(Group)
							{
								Group= FALSE;
								break;
							}
							if(Opdata <= 0177)
							{
								PC &= 07600;
								PC |= Opdata;
								incr= Origin;
								decode();
								break;
							}
							error= "Relative offset too large";
							break;
						case Absolute:           /* Absolute address      */
							if(Group)
							{
								Group= FALSE;
								break;
							}
							if(Opdata <= 07777)
							{
								PC = Opdata;
								incr= Origin;
								decode();
								break;
							}
							error= "Relative offset too large";
							break;
						case Delayed:            /* JMP and JMS */
						case Standard:           /* AND, TAD, ISZ and DCA */
							if(Group)
							{
								Group= FALSE;
								break;
							}
							Bincode= Opdata|0200;
							decode();
							while((Optype==Indirect)||(Optype==Page0))
							{
								if(Optype==Indirect)
									Bincode |= Opdata;
								else
									Bincode &= Opdata;
								decode();
							}
							if((Optype==Octalnbr)||(Optype==Labelref)||(Optype==Offset))
							{
								if((Optype==Labelref)||(Optype==Octalnbr))
								{
									if(Bincode&0200)    /* Direct? */
									{
										if((PC&07600)==(Opdata&07600))
											Opdata &= 0177;
										else
										{
											error= "Label reference in wrong page";
											break;
										}
									}
								}
								if(Opdata <= 0177)  /* Offset */
								{
									Bincode |= Opdata;
									incr= CPUcode;
									decode();
								}
								else
									error= "Relative offset too large";
								break;
							}
							error= "Incorrect code object";
							break;
						case Groupclr:
							incr= CPUcode;
							Bincode |= Opdata;  /* Can be in groups 1, 2 or 3 */
							if(!Group) Group= Groupclr;
							break;
						case Group1r:
						case Group1:
							/* Includes Groupclr and Group1. Group1r defaults
							* to Group1.
							*/
							incr= CPUcode;
							if((!Group)||(Group==Groupclr)||(Group==Group1))
							{
								Bincode |= Opdata;
								Group= Group1;
							}
							else if(Group==Group1r)
							{
								Bincode &= 07761;
								Bincode |= Opdata;
								Group= Group1;
							}
							else Group= FALSE;
							break;              /* If looping does decode     */
						case Group2:
							incr= CPUcode;
							if((!Group)||(Group==Groupclr)||(Group==Group2)
								||(Group==Group2and)||(Group==Group2or))
							{
								Bincode |= Opdata;
								if((Group!=Group2and)&&(Group!=Group2or))
									Group= Group2;
							}
							else Group= FALSE;
							break;
						case Group2and:
							incr= CPUcode;
							if((!Group)||(Group==Groupclr)||(Group==Group2)
								||(Group==Group2and))
							{
								Bincode |= Opdata;
								Group= Group2and;
							}
							else Group= FALSE;
							break;
						case Group2or:
							incr= CPUcode;
							if((!Group)||(Group==Groupclr)||(Group==Group2)
								||(Group==Group2or))
							{
								Bincode |= Opdata;
								Group= Group2or;
							}
							else Group= FALSE;
							break;
						case Group3:
							incr= CPUcode;
							if((!Group)||(Group==Groupclr)||(Group==Group3))
							{
								Bincode |= Opdata;
								Group= Group3;
							}
							else Group= FALSE;
							break;
						default:
							/* If here cannot compile */
							Optype= Nocompile;
							error= "Cannot compile in this version";
					}
				}
				while(Group);
				/* Done! Print the line and send code to the output file */
				/* Code output */
				if(incr==Origin)
				{
					putcode((0100|((PC>>6)&077)), fout); /* Origin & high byte */
					putcode((PC&077), fout);            /* low byte           */
				}
				else if(incr==CPUcode) 
				{
					putcode(((Bincode>>6)&077), fout);
					putcode((Bincode&077), fout);
				}
				else if(incr==Str_8)
				{
					printf("%04d %02o %04o\t\t%s",line,Field,PC,copy);
					while(Bincode= *token++)
					{
						if(Bincode=='\n')
						{
							outcode(0, fout);
							printf("\n");
							break;
						}
						else
						{
							outcode(Bincode, fout);
						}
					}
				}
				else if(incr==Ifield)
				{
					putcode((0300|(Field&077)), fout);
					putcode((0100|((PC>>6)&077)), fout);
					putcode((PC&077), fout);
				}
				/* Print the line and update the PC */
				if(Optype==Noline) printf("%04d \t\t\t%s", line, copy); 
				else
				{
					if(*error!='\0') errpnt();
					else if((Optype != Nodata)&&(Optype != Comment)&&(Optype != Quote))
					{
						error= "Extra data on the line";
						errpnt();
					}
					if(*error!='\0') printf("%04d \t\t\t%s", line, copy);
					else
					{
						if(incr==CPUcode)
						{
							printf("%04d %02o  %04o  %04o\t%s",line,Field,PC,Bincode,copy);
							printf("%04d %02o $%04X $%04X\n\n",line,Field,PC,Bincode);
							incpc();
						}
						else if(incr==Str_8) continue;
						else
							printf("%04d %02o %04o\t\t%s",line,Field,PC,copy);
					}
				}
				continue;
			}
		}
	/* Rewind for next pass */
	fseek(fin, 0, SEEK_SET);
	}
}

/* Decode token. Presently checks
 * for comment, page offset, isolated
 * number, standard mnemonics, label 
 * definition or reference and end of
 * line.
 * Changes Optype and Opdata unless
 * not decodeable. 
 * Returns TRUE/FALSE.
 */
decode()
{
	int len;
	char sep[]= " \t\n";
	token= strtok(tokenptr, sep);
	tokenptr= NULL;
	if(token != NULL)
	{
		len= strlen(token);
		/* Check for comment, page offset
		 * or absolute address.
		 */
		if(*token=='/') Optype= Comment;
		else if(*token=='"') Optype= Quote;
		else if(*token=='+')
		{
			if(octal(token+1))
				Optype= Offset;
			else
			{
				error= "Incorrect page offset";
				return(FALSE);
			}
		}
		else if(*token=='*')
		{
			if(octal(token+1))
				Optype= Absolute;
			else
			{
				error= "Incorrect address";
				return(FALSE);
			}
		}
		else if(token[--len]==',')
		{
			if(len >= 1)
			{
				token[len]= '\0';
				Optype= Labeldef;
				Opdata= PC;
			}
			else fatal("Illegal label definition");
		}


		/* See if a standard directive (including
		 * label reference) or isolated number 
		 */
		else if(opmatch(token));
		else if(octal(token)) *error= '\0';
		else
		{
			Optype= Nomatch;
			error= "Could not decode";
			return(FALSE);
		}
		return(TRUE);
	}
	else
	{
		Optype= Nodata;
		return(TRUE);
	}
}

/* Convert octal string to integer.
 * Returns TRUE/FALSE.
 */
octal(strptr)
char *strptr;
{
	char c;
	for(Opdata= 0; c= *strptr; strptr++)
	{
		if((c>='0') && (c<='7')) Opdata= 8*Opdata +(c-'0');
		else
		{
			error= "Not an octal number";
			return(FALSE);
		}
	}
	Optype= Octalnbr;
	return(TRUE);
}

/* Find decode array match to the given string.
 * If found sets Optype and Opdata and returns
 * TRUE, else returns FALSE and Optype is
 * unchanged.
 */
opmatch(optoken)
char *optoken;
{
	int  i;

	for(PDPptr= PDPdata, i= 0; i < PDPsize; i++, PDPptr++)
		if(strcmpi((PDPptr->Instr), optoken) == 0)
		{
			Optype= PDPptr->Type;
			Opdata= PDPptr->Code;
			return(TRUE);
		}
	error= "Not a valid opcode";
	return(FALSE);
}

/* Put label in the decode array.
 * Routine may be used for other
 * things so have Optype and
 * Opdata already set up.
 */
oplabel(lbltoken)
char *lbltoken;
{
	if(PDPsize == MAXTOKEN)
	{
		fatal("Label array full");
	}
	else if(opmatch(lbltoken))
	{
		error= "Label redefinition";
		return(FALSE);
	}
	/* Put label in array */
	PDPptr= &PDPdata[PDPsize];
	PDPptr->Instr= strdup(lbltoken);
	PDPptr->Type=  Optype;
	PDPptr->Code=  Opdata;
	/* Increment the number of array entries */
	PDPsize++;
	return(TRUE);
}

/* Increment the program counter */
incpc()
{
	PC += 1;
	PC &= 07777;   /* Wrap-around */
}

putcode(PDPcode, fpout)
FILE *fpout;
unsigned int PDPcode;
{
/*     printf("-> %03o\n", PDPcode);*/
	Chksum += PDPcode;
	if(putc(PDPcode, fpout)==EOF)
	{
		fatal("Error in writing data to output file");
	}
}

ltcode(fpout)
FILE *fpout;
{
int ltcount= 10;        /* About 1 inch if paper tape */
	for(ltcount= 0; ltcount <10; ltcount++) putcode(0200, fpout);
}
/* If error message, print it and
 * return TRUE, else return FALSE.
 */

errpnt()
{
	if(*error == '\0') return(FALSE);
	/*printf("%s", copy);*/
	printf("\tError at line %d: %s\n", line, error);
	return(TRUE);
}

/* Print fatal error message and exit*/
fatal(message)
char *message;
{
	printf("\tFatal error at line %d: %s\n", line, message);
	exit(1);
}

/* Output straight binary code */
outcode(Bincode, fpout)
FILE *fpout;
int Bincode;
{
	printf("               %04o\n", Bincode);
	putcode(((Bincode>>6)&077), fpout);
	putcode((Bincode&077), fpout);
	incpc();
}

