/*
 *    XML-comprifex is a small and fast programm for compressing XML-files.
 *    Copyright (C) 2009  Markus Pargmann (http://scosu.org)
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */




#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <math.h>
#include <string.h>

// searches for the first bit in a byte and returns the position
unsigned char get_first_one(unsigned int value)
{
	int j;
	for(j = sizeof(unsigned int)*8-1; j != -1; j--)
	{
		if(value & (1<<j))
		{
			break;
		}
	}
	return j+1;
}


// reads a given length from a char-array
unsigned char bin_read(unsigned char *values, unsigned int *pointer, unsigned char *shifter, unsigned char length)
{
	unsigned char result = 0;
	unsigned char i;
	if((*shifter) > length)
	{
		(*shifter) -= length;
		result |= values[*pointer] >> (*shifter);
	}
	else
	{
		result |= values[(*pointer)++] << (length-(*shifter));
		*shifter = 8-(length-(*shifter));
		result |= values[*pointer] >> (*shifter);
	}
	for(i = 7; i >= length; i--)
	{
		result &= ~(1 << i);
	}
	return result;
}


// decodes the values followed by a counter
unsigned int rle_decode(unsigned char *values, unsigned int end, unsigned char *aim)
{
	unsigned int aim_pointer = 0;
	const int repeatings = 1;
	
	short counter = 0;
	unsigned char old_value = values[0]-1;
	unsigned int i;
	int n;
	for(i = 0; i < end; i++)
	{
		if(old_value == values[i])
		{
			counter++;
		}
		else
		{
			if(counter > repeatings-1)
			{
				n = values[i];
				while(n >= 0)
				{
					aim[aim_pointer++] = old_value;
					n--;
				}
				old_value = values[i+1]-1;
				counter = 0;
			}
			else
			{
				aim[aim_pointer++] = values[i];
				counter = 0;
				old_value = values[i];
			}
		}
	}
	return aim_pointer;
}


// decodes the dynamic tables
unsigned int decode_dynamic_reordering(unsigned char *values, unsigned int end, unsigned char *aim)
{
	unsigned char shifter = 8;
	unsigned int pointer = 0;
	unsigned int aim_pointer = 0;
	unsigned char optimal_bits = bin_read(values, &pointer, &shifter, 4)+1;
	unsigned char unused_bits_last_byte = bin_read(values, &pointer, &shifter, 4)+1;
	unsigned char elements = bin_read(values, &pointer, &shifter, 8) + 1;
	unsigned int optimal_border = pow(2, optimal_bits);
	unsigned char small_translation[256];
	unsigned char big_translation[256];
	unsigned int i;
	unsigned char normal_bits = get_first_one(elements);
	unsigned char read;
	
	// reads the table with the different values
	for(i = 0; i < optimal_border; i++)
	{
		small_translation[i] = bin_read(values, &pointer, &shifter, 8);
	}
	for(i = 0; i < elements - optimal_border+1; i++)
	{
		big_translation[i] = bin_read(values, &pointer, &shifter, 8);
	}
	
	// retranslate the whole stuff
	do
	{
		if(!bin_read(values, &pointer, &shifter, 1))
		{
			aim[aim_pointer++] = small_translation[bin_read(values, &pointer, &shifter, optimal_bits)];
		}
		else
		{
			aim[aim_pointer++] = big_translation[bin_read(values, &pointer, &shifter, normal_bits)];
		}
	}
	while(pointer < end || unused_bits_last_byte < shifter-1);
	return aim_pointer;
}


// retranslating the static ascii-table
unsigned int decode_reduced_bits(unsigned char *values, unsigned int end, unsigned char *aim, unsigned char *ascii_table)
{
	unsigned char retranslate[256];
	int i;
	for(i = 0; i < 256; i++)
	{
		retranslate[ascii_table[i]] = i;
	}
	unsigned int pointer = 0;
	unsigned char shifter = 8;
	unsigned int aim_pointer = 0;
	unsigned int global_min = bin_read(values, &pointer, &shifter, 8);
	unsigned char global_bits = bin_read(values, &pointer, &shifter, 3) + 1;
	unsigned char unused_bits_last_byte = bin_read(values, &pointer, &shifter, 4)+1;
	unsigned char min;
	unsigned char bits;
	while(pointer < end)
	{
		if(bin_read(values, &pointer, &shifter, 1))
		{
			while(pointer < end)
			{
				aim[aim_pointer] = bin_read(values, &pointer, &shifter, global_bits);
				if(aim[aim_pointer] == 0)
				{
					aim[aim_pointer++] = 0;
					break;
				}
				else
				{
					aim[aim_pointer++] = retranslate[aim[aim_pointer] + global_min];
				}
			}
		}
		else
		{
			min = bin_read(values, &pointer, &shifter, 8)-1;
			bits = bin_read(values, &pointer, &shifter, 3) + 1;
			while(pointer < end)
			{
				aim[aim_pointer] = bin_read(values, &pointer, &shifter, bits);
				if(aim[aim_pointer] == 0)
				{
					aim[aim_pointer++] = 0;
					break;
				}
				else
				{
					aim[aim_pointer++] = retranslate[aim[aim_pointer] + min];
				}
			}
		}
	}
}

int main(int argc, char **args)
{
	unsigned char ascii_table[256];
	unsigned int n = 0;
	unsigned int i;
	for(i = 0; i < 33; i++)
	{
		ascii_table[i] = 0;
	}
	n++;
	for(i = 65; i < 91; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[95] = n++;
	for(i = 97; i < 123; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 48; i < 58; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 33; i < 48; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 58; i < 65; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 91; i < 95; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[96] = n++;
	for(i = 123; i < 256; i++)
	{
		ascii_table[i] = n++;
	}
	if(argc < 3)
	{
		printf("Please call this program with the compressed file and the aim-file as second argument\n");
		exit(1);
	}
	FILE *input = fopen(args[1], "rb");
	FILE *output = fopen(args[2], "w");
	if(input == NULL)
	{
		printf("%s not found or has the wrong permissions\n", args[1]);
		exit(2);
	}
	if(output == NULL)
	{
		printf("%s can't be created\n", args[2]);
		exit(2);
	}
	struct stat *filestat = malloc(sizeof(struct stat));
	stat(args[1], filestat);
	int filesize = (int)(filestat->st_size);
	
	unsigned char *bin = malloc(sizeof(unsigned char) * filesize * 20);
	unsigned char *tags = malloc(sizeof(unsigned char) * filesize * 20);
	unsigned char *txt = malloc(sizeof(unsigned char) * filesize * 20);
	unsigned char *tmp_read = malloc(sizeof(unsigned char) * filesize * 20);
	unsigned char **tag_list = malloc(sizeof(unsigned char*)*filesize * 20/4);
	unsigned char **tag_stack = malloc(sizeof(unsigned char*)*filesize * 20/4);
	unsigned int tag_list_pointer = 0;
	
	unsigned int tag_stack_pointer = 0;
	unsigned int txt_pointer = 0;
	unsigned int tags_pointer = 0;
	unsigned int bin_pointer = 0;
	unsigned int tmp_read_pointer = 0;
	
	int read = 0;
	unsigned int delimiter;
	unsigned char status = fgetc(input);
	
	// setting the delimiter
	if(status & (1<<2))
	{
		delimiter = fgetc(input);
	}
	else
	{
		delimiter = 0;
		delimiter |= (fgetc(input) << 8);
		delimiter |= fgetc(input);
	}
	
	// reading the bin-part until we read the delimiter
	while((read = fgetc(input)) != EOF)
	{
		tmp_read[tmp_read_pointer] = read;
		if(tmp_read_pointer != 0)
		{
			if(status & (1<<2))
			{
				if(delimiter == tmp_read[tmp_read_pointer])
				{
					break;
				}
			}
			else
			{
				if(delimiter == ((tmp_read[tmp_read_pointer-1] << 8) | tmp_read[tmp_read_pointer]))
				{
					tmp_read_pointer--;
					break;
				}
			}
		}
		tmp_read_pointer++;
	}
	
	// decode the stuff dependent of the status bits
	if(status & (1<<7))
	{
		bin_pointer = decode_dynamic_reordering(tmp_read, tmp_read_pointer, bin);
		if(status & (1<<6))
		{
			tmp_read_pointer = decode_dynamic_reordering(bin, bin_pointer, tmp_read);
			bin_pointer = rle_decode(tmp_read, tmp_read_pointer, bin);
		}
		else
		{
			tmp_read_pointer = rle_decode(bin, bin_pointer, tmp_read);
			for(bin_pointer = 0; bin_pointer <= tmp_read_pointer; bin_pointer++)
			{
				bin[bin_pointer] = tmp_read[bin_pointer];
			}
		}
	}
	else
	{
		bin_pointer = rle_decode(tmp_read, tmp_read_pointer, bin);
	}
	
	
	// read the tag-part from file
	tmp_read_pointer = 0;
	while((read = fgetc(input)) != EOF)
	{
		tmp_read[tmp_read_pointer] = read;
		if(tmp_read_pointer != 0)
		{
			if(status & (1<<2))
			{
				if(delimiter == tmp_read[tmp_read_pointer])
				{
					break;
				}
			}
			else
			{
				if(delimiter == ((tmp_read[tmp_read_pointer-1] << 8) | tmp_read[tmp_read_pointer]))
				{
					tmp_read_pointer--;
					break;
				}
			}
		}
		tmp_read_pointer++;
	}
	
	// and same again... decode
	
	if(status & (1<<5))
	{
		tags_pointer = decode_dynamic_reordering(tmp_read, tmp_read_pointer, tags);
		for(tmp_read_pointer = 0; tmp_read_pointer <= tags_pointer; tmp_read_pointer++)
		{
			tmp_read[tmp_read_pointer] = tags[tmp_read_pointer];
		}
	}
	
	unsigned char retranslate[256];
	for(i = 0; i < 256; i++)
	{
		retranslate[ascii_table[i]] = i;
	}
	unsigned char shifter = 8;
	unsigned int adress = 0;
	unsigned int pointer = 0;
	unsigned int global_min = bin_read(tmp_read, &pointer, &shifter, 8) - 1;
	unsigned char global_bits = bin_read(tmp_read, &pointer, &shifter, 3) + 1;
	unsigned char unused_bits_last_byte = bin_read(tmp_read, &pointer, &shifter, 4)+1;
	unsigned char min;
	unsigned char bits;
	unsigned int copy;
	tags_pointer = 0;
	tag_list_pointer = 0;
	while(pointer < tmp_read_pointer)
	{
		if(bin_read(tmp_read, &pointer, &shifter, 1))
		{
			// we read an adress.. so copy the stuff from the first occurence
			copy = bin_read(tmp_read, &pointer, &shifter, get_first_one(tag_stack_pointer));
			tag_list[tag_list_pointer++] = &tags[tags_pointer];
			for(i = 0; tag_stack[copy][i] != 0; i++)
			{
				tags[tags_pointer++] = tag_stack[copy][i];
			}
			tags[tags_pointer++] = 0;
		}
		else
		{
			// we read a text-tag
			tag_stack[tag_stack_pointer++] = &tags[tags_pointer];
			tag_list[tag_list_pointer++] = &tags[tags_pointer];
			adress++;
			if(bin_read(tmp_read, &pointer, &shifter, 1))
			{
				while(pointer < tmp_read_pointer)
				{
					tags[tags_pointer] = bin_read(tmp_read, &pointer, &shifter, global_bits);
					if(tags[tags_pointer] == 0)
					{
						tags[tags_pointer++] = 0;
						break;
					}
					else
					{
						tags[tags_pointer++] = retranslate[tags[tags_pointer] + global_min];
					}
				}
			}
			else
			{
				min = bin_read(tmp_read, &pointer, &shifter, 8)-1;
				bits = bin_read(tmp_read, &pointer, &shifter, 3) + 1;
				while(pointer < tmp_read_pointer)
				{
					tags[tags_pointer] = bin_read(tmp_read, &pointer, &shifter, bits);
					if(tags[tags_pointer] == 0)
					{
						tags[tags_pointer++] = 0;
						break;
					}
					else
					{
						tags[tags_pointer++] = retranslate[tags[tags_pointer] + min];
					}
				}
			}
		}
	}
	tags_pointer--;
	
	
	
	
	
	// read the text-part... no delimiter here because it's to the end of the file
	tmp_read_pointer = 0;
	while((read = fgetc(input)) != EOF)
	{
		tmp_read[tmp_read_pointer] = read;
		tmp_read_pointer++;
	}
	
	// static ascii-table
	n = 0;
	n++;
	ascii_table[32] = n++;
	for(i = 65; i < 91; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[95] = n++;
	for(i = 97; i < 123; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[10] = n++;
	ascii_table[13] = n++;
	for(i = 48; i < 58; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 33; i < 48; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 58; i < 65; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 91; i < 95; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[96] = n++;
	for(i = 1; i < 10; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[11] = n++;
	ascii_table[12] = n++;
	for(i = 14; i < 32; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 123; i < 256; i++)
	{
		ascii_table[i] = n++;
	}
	
	// decode the stuff...
	if(status & (1<<4))
	{
		txt_pointer = decode_dynamic_reordering(tmp_read, tmp_read_pointer, txt);
		if(status & (1<<3))
		{
			tmp_read_pointer = decode_dynamic_reordering(txt, txt_pointer, tmp_read);
			for(txt_pointer = 0; txt_pointer <= tmp_read_pointer; txt_pointer++)
			{
				txt[txt_pointer] = tmp_read[txt_pointer];
			}
		}
	}
	else
	{
		txt_pointer = decode_reduced_bits(tmp_read, tmp_read_pointer, txt, ascii_table);
	}
	
	
	tmp_read_pointer = 0;
	shifter = 8;
	pointer = 0;
	int depth = 0;
	int state = bin_read(bin, &pointer, &shifter, 1);
	unsigned int tag_add = 0;
	unsigned int txt_add = 0;
	tag_add = 0;
	tag_stack_pointer = 0;
	
	// build the xml-file through the whole stuff
	while(pointer <= bin_pointer)
	{
		if(bin_read(bin, &pointer, &shifter, 1))
		{
			if(state)
			{
				fputc('<', output);
				for(i = 0; tag_list[tag_add][i] != 0; i++)
				{
					fputc(tag_list[tag_add][i], output);
				}
				fputc('>', output);
				tag_stack[tag_stack_pointer++] = tag_list[tag_add++];
			}
			else
			{
				state = 1;
			}
		}
		else
		{
			if(state)
			{
				while(txt[txt_add] != 0)
				{
					fputc(txt[txt_add++], output);
				}
				txt_add++;
				state = 0;
			}
			else
			{
				if(tag_stack_pointer > 0)
				{
					fputc('<', output);
					fputc('/', output);
					tag_stack_pointer--;
					for(i = 0; tag_stack[tag_stack_pointer][i] != 0 && tag_stack[tag_stack_pointer][i] != ' '; i++)
					{
						fputc(tag_stack[tag_stack_pointer][i], output);
					}
					fputc('>', output);
				}
				else
				{
					break;
				}
			}
		}
	}
	fclose(input);
	fclose(output);
	free(bin);
	free(tags);
	free(txt);
	free(tag_list);
	free(tag_stack);
	free(tmp_read);
	return 0;
}

