/*
 *    XML-comprifex is a small and fast programm for compressing XML-files.
 *    Copyright (C) 2009  Markus Pargmann (http://scosu.org)
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */






#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <math.h>
#include <string.h>


// states of the parsing machine
enum STATES
{
	START=0,
	TAG_START,
	CLOSING_TAG,
	OPENING_TAG
};


// appends up to 8 bit to a given array
inline void bin_append(unsigned char *array, int *pointer, unsigned char *shifter, unsigned char value, unsigned char length)
{
	if(*shifter >= length)
	{
		*shifter -= length;
		array[*pointer] |= value << (*shifter);
	}
	else
	{
		length -= *shifter;
		array[(*pointer)++] |= value >> length;
		array[*pointer] = 0;
		*shifter = 8-length;
		array[*pointer] |= value << (*shifter);
	}
}


// searches for the first bit in a int and returns the position
inline unsigned char get_first_one(unsigned int value)
{
	int j;
	for(j = sizeof(unsigned int)*8-1; j != -1; j--)
	{
		if(value & (1<<j))
		{
			break;
		}
	}
	return j+1;
}


// generating a dynamic table ordered by the most occurence of the different byte-values
inline int dynamic_reordering_alphabet(unsigned char *values, unsigned int end, unsigned char *aim)
{
	aim[0] = 0;
	unsigned int sums[256];
	unsigned char reverse[256];
	int cur = 0;
	
	// initializing sums(how often every value occures in the value-array) and reverse for the later sort
	for(cur = 0; cur < 256; cur++)
	{
		reverse[cur] = cur;
		sums[cur] = 0;
	}
	
	// counting the values in the array
	for(cur = 0; cur < end; cur++)
	{
		sums[values[cur]]++;
	}
	unsigned int elements = 0;
	unsigned int n;
	unsigned char tmp;
	
	
	// min-heap-sort
	unsigned char heapsize = 255;
	unsigned short l;
	unsigned short r;
	short largest;
	unsigned short next;
	
	// build-min-heap
	for(cur = 127; cur >= 0; cur--)
	{
		
		// min-heapify
		next = cur;
		while(1)
		{
			l = 2*(next+1)-1;
			r = l+1;
			largest = next;
			if(l < 256 && sums[reverse[l]] < sums[reverse[largest]])
			{
				largest = l;
			}
			if(r < 256 && sums[reverse[r]] < sums[reverse[largest]])
			{
				largest = r;
			}
			if(largest == next)
			{
				break;
			}
			tmp = reverse[next];
			reverse[next] = reverse[largest];
			reverse[largest] = tmp;
			next = largest;
		}
	}
	
	
	
	for(cur = 255; cur != 0; cur--)
	{
		if(sums[reverse[0]] != 0)
		{
			elements++;
		}
		
		// swap first and last heap-element
		tmp = reverse[0];
		reverse[0] = reverse[cur];
		reverse[cur] = tmp;
		
		// min-heapify
		next = 0;
		while(1)
		{
			l = 2*(next+1)-1;
			r = l+1;
			largest = next;
			if(l < cur && sums[reverse[l]] < sums[reverse[largest]])
			{
				largest = l;
			}
			if(r < cur && sums[reverse[r]] < sums[reverse[largest]])
			{
				largest = r;
			}
			if(largest == next)
			{
				break;
			}
			tmp = reverse[next];
			reverse[next] = reverse[largest];
			reverse[largest] = tmp;
			next = largest;
		}
	}
	unsigned char normal_length = get_first_one(elements);
	unsigned long sum;
	unsigned int max = 2;
	unsigned long best_val = -1;
	unsigned char best_max = 2;
	
	
	// calculate the best compression
	for(cur = 1; cur < 9; cur++)
	{
		sum = 0;
		for(n = 0; n < 256; n++)
		{
			if(n < max)
			{
				sum += sums[reverse[n]] * (cur+1);
			}
			else
			{
				sum += sums[reverse[n]] * (normal_length+1);
			}
		}
		if(best_val > sum)
		{
			best_val = sum;
			best_max = cur;
		}
		max *= 2;
	}
	
	
	// don't compress if the best value is bigger than normal
	if(end > best_val/8 + elements + 1)
	{
		unsigned char retranslate[256];
		unsigned char aim_shifter = 8;
		best_val = pow(2, best_max);
		cur = 0;
		
		// write some header-information
		bin_append(aim, &cur, &aim_shifter, best_max-1, 4);
		
		// empty room for the free bits at the end of all
		bin_append(aim, &cur, &aim_shifter, 0,4);
		bin_append(aim, &cur, &aim_shifter, elements-1, 8);
		
		// write the short values and create the translation-table
		for(n = 0; n < best_val; n++)
		{
			bin_append(aim, &cur, &aim_shifter, reverse[n], 8);
			retranslate[reverse[n]] = n;

		}
		
		// write the long values and create the translation-table
		for(n = best_val; n <= elements; n++)
		{
			bin_append(aim, &cur, &aim_shifter, reverse[n], 8);
			retranslate[reverse[n]] = n;
		}
		
		// translate the values and shorten the length
		for(n = 0; n < end; n++)
		{
			
			// is this one of the small values
			if(retranslate[values[n]] < best_val)
			{

				bin_append(aim, &cur, &aim_shifter, 0, 1);
				bin_append(aim, &cur, &aim_shifter, retranslate[values[n]], best_max);
			}
			else
			{
				bin_append(aim, &cur, &aim_shifter, 1, 1);
				bin_append(aim, &cur, &aim_shifter, retranslate[values[n]] - best_val, normal_length);
			}
		}
		
		// insert the not used bits at the beginning in the empty room
		aim[0] |= (aim_shifter == 0 ? 7 : aim_shifter-1);
		return cur+1;
	}
	return -1;
}


// count same values and replace them
inline unsigned int rle_encode(unsigned char *values, unsigned int end, unsigned char *aim, unsigned int cur)
{
	// how often the value has to occure before awaiting a counter
	const int repeatings = 1;
	unsigned int i;
	
	// to avoid a match for the first loop
	unsigned char old_value = values[0]-1;
	unsigned short counter = 0;
	for(i = 0; i != end; i++)
	{
		if(old_value == values[i])
		{
			old_value = values[i];
			if(counter++ == 255 + repeatings-1)
			{
				aim[cur++] = 255;
				counter = 0;
				old_value--;
			}
			else if(counter <= repeatings)
			{
				aim[cur++] = values[i];
			}
		}
		else
		{
			if(counter > repeatings-1)
			{
				aim[cur++] = counter-repeatings;
			}
			counter = 0;
			old_value = values[i];
			aim[cur++] = values[i];
		}
	}

	return cur;
}


// reduce the used bits per character by translating with the static ascii-table
inline unsigned int reduce_used_bits(unsigned char *values, unsigned int end, unsigned char *ascii_table, unsigned char *aim)
{
	unsigned char global_bits = 0;
	unsigned char global_min = 255;
	unsigned long global_max = 0;
	unsigned long max = 0;
	unsigned char min;
	unsigned long elements = 0;
	unsigned int i;
	unsigned int aim_pointer = 0;
	unsigned char aim_shifter = 8;
	aim[0] = 0;
	
	// finding the global minimal value and the maximum (not total maxmimum)
	for(i = 0; i < end; i++)
	{
		if(values[i] == 0)
		{
			global_max += max;
			max = 0;
			elements++;
			continue;
		}
		if(ascii_table[values[i]] < global_min)
			global_min = ascii_table[values[i]];
		if(ascii_table[values[i]] > max)
		{
			max = ascii_table[values[i]];
		}
	}
	
	// calculating the best values for min and max
	global_max = (double)global_max/(double)elements;
	global_bits = get_first_one(global_max-global_min+1);
	global_min--;
	
	// writing this stuff to the header
	bin_append(aim, &aim_pointer, &aim_shifter, global_min, 8);
	bin_append(aim, &aim_pointer, &aim_shifter, global_bits, 3);
	bin_append(aim, &aim_pointer, &aim_shifter, 0, 4);
	global_bits++;
	unsigned char already_wrote = 0;
	unsigned int n;
	
	// translate the characters and write them
	for(i = 0; i < end; i++)
	{
		if(already_wrote)
		{
			if(values[i] == 0)
			{
				already_wrote = 0;
				continue;
			}
			continue;
		}
		max = 0;
		min = 255;
		
		// find out if the maximum is matching
		for(n = i; n < end && values[n] != 0; n++)
		{
			if(ascii_table[values[n]] < min)
			{
				min = ascii_table[values[n]];
			}
			if(ascii_table[values[n]] > max)
			{
				max = ascii_table[values[n]];
			}
		}
		
		// else write a short header for the new character array
		if(max > global_max && get_first_one(max-global_min)+1 > global_bits)
		{
			max = get_first_one(max-min+1)-1;
			bin_append(aim, &aim_pointer, &aim_shifter, 0, 1);
			bin_append(aim, &aim_pointer, &aim_shifter, min, 8);
			bin_append(aim, &aim_pointer, &aim_shifter, max, 3);
			min--;
			max++;
		}
		else
		{
			bin_append(aim, &aim_pointer, &aim_shifter, 1, 1);
			min = global_min;
			max = global_bits;
		}
		for(n = i; n < end && values[n] != 0; n++)
		{
			bin_append(aim, &aim_pointer, &aim_shifter, ascii_table[values[n]] - min, max);
		}
		bin_append(aim, &aim_pointer, &aim_shifter, 0, max);
		already_wrote = 1;
	}
	
	// write the free bits left to the header
	aim[1] |= ((aim_shifter == 0 ? 7 : aim_shifter-1) << 1);
	return aim_pointer+1;
}

int main(int argc, char **args)
{
	// reading the filesize
	struct stat *filestat = malloc(sizeof(struct stat));
	stat(args[1], filestat);
	int filesize = (int)(filestat->st_size);
	
	
	// allocating the memory for the 3 parts of the final file: bin, tags, txt
	unsigned char *txt = malloc(sizeof(unsigned char)*filesize+10);
	unsigned char *tags = malloc(sizeof(unsigned char)*filesize/2);
	unsigned char *bin = malloc(sizeof(unsigned char)*filesize/2);

	
	// pointer for the arrays above
	int txt_pointer = 0;
	int tags_pointer = 0;
	int bin_pointer = 0;
	unsigned char bin_shifter = 8;
	
	// allocating a tag-list with pointers to the start of every tag
	unsigned char **tag_list = malloc(sizeof(unsigned char*)*filesize/4);
	
	int tag_list_pointer = 0;
	unsigned char *final_result = malloc(sizeof(unsigned char)*filesize*2);
	unsigned int final_pointer = 0;
	// allocating a global memory area to use it as space for compressing-algorithms
	unsigned char *compress = malloc(sizeof(unsigned char)*filesize*2);
	
	int compress_pointer = 0;
	unsigned char compress_shifter = 8;
	unsigned char status = 0;
	
	unsigned int tag_part;
	unsigned int bin_part;
	
	// initializing some count-vars
	int i;
	int j;
	int n;
	
	
	
	// opening read-file and write-file
	if(argc < 3)
	{
		printf("Please give the XML-File as first and the aim-file as second argument\n");
		exit(1);
	}
	FILE *xml_file = fopen(args[1], "rb");
	FILE *cmp_file = fopen(args[2], "w");
	FILE *tmp_file;

	if(xml_file == NULL)
	{
		printf("%s doesn't exist or has the wrong permissions\n", args[1]);
		exit(2);
	}
	if(cmp_file == NULL)
	{
		printf("%s can't be created\n", args[2]);
		exit(2);
	}
	
	// setting the first element of the bin-array. all the other array-elements will be initialized automatically
	bin[0] = 0;
	
	
	// read data
	int data;
	
	
	// xml-parser as state-machine
	int state = START;
	
	// already wrote a delimiter between tags/txt
	char wrote_null = 1;
	char wrote_data = 0;
	
	
	
	while((data = fgetc(xml_file)) != EOF)
	{
		switch(state)
		{
			case START:
				if(data == '<')
				{
					state = TAG_START;
					continue;
				}
				txt[txt_pointer++] = data;
				wrote_null = 0;
				if(wrote_data == 0)
				{
					wrote_data = 1;
					bin_append(bin, &bin_pointer, &bin_shifter, 2, 2);
				}
				break;
			case TAG_START:
				if(data == '/')
				{
					state = CLOSING_TAG;
					bin_append(bin, &bin_pointer, &bin_shifter, 0, 1);
					wrote_data = 0;
					continue;
				}
				else if(data == '?' || data == '!')
				{
					state = START;
					txt[txt_pointer++] = '<';
					txt[txt_pointer++] = data;
					if(wrote_data == 0)
					{
						wrote_data = 1;
						bin_append(bin, &bin_pointer, &bin_shifter, 2, 2);
					}
					wrote_null = 0;
					continue;
				}
				else
				{
					state = OPENING_TAG;
					tags[tags_pointer++] = data;
					tag_list[tag_list_pointer++] = &tags[tags_pointer-1];
					
				}
				break;
			case OPENING_TAG:
				if(data == '>')
				{
					if(tags[tags_pointer-1] == '/')
					{
						if(wrote_data == 0)
						{
							wrote_data = 1;
							bin_append(bin, &bin_pointer, &bin_shifter, 2, 2);
						}
						wrote_null = 0;
						txt[txt_pointer++] = '<';
						for(i = tags_pointer-1; tags[i] != 0; i--)
						{
						}
						i++;
						for(n = i; n < tags_pointer; n++)
						{
							txt[txt_pointer++] = tags[n];
						}
						txt[txt_pointer++] = '>';
						
						tags_pointer = i;
						tag_list_pointer--;
					}
					else
					{
						wrote_data = 0;
						
						tags[tags_pointer++] = 0;
						bin_append(bin, &bin_pointer, &bin_shifter, 1, 1);
						if(!wrote_null)
						{
							txt[txt_pointer++] = 0;
							wrote_null = 1;
						}
					}
					state = START;
					continue;
				}
				tags[tags_pointer++] = data;
				break;
			case CLOSING_TAG:
				if(data == '>')
				{
					state = START;
					if(!wrote_null)
					{
						txt[txt_pointer++] = 0;
						wrote_null = 1;
					}
					continue;
				}
				break;
		}
	}
	// adding another closing tag to notify the decoder about the end of the binary-part
	bin_append(bin, &bin_pointer, &bin_shifter, 0, 1);
	
	
	
	
	unsigned char min = 0;
	unsigned char max = 0;
	bin_pointer++;
	compress_pointer = rle_encode(bin, bin_pointer, compress, 0);
	
	// try the dynamic encoding
	bin_pointer = dynamic_reordering_alphabet(compress, compress_pointer, bin);
	if(bin_pointer != -1)
	{
		// write a status bit
		status |= 1 << 7;
		compress_pointer = dynamic_reordering_alphabet(bin, bin_pointer, compress);
		if(compress_pointer == -1)
		{
			goto bin_write_bin;
		}
		else
		{
			// another status bit
			status |= 1 << 6;
			goto bin_write_compress;
		}
	}
	else	// if the dynamic encoding wouldn't make it better
	{
		goto bin_write_compress;
	}
	
	
bin_write_compress:
//	tmp_file = fopen("bin.cmp", "w");
	// writing the bin-stuff to file
	for(i = 0; i != compress_pointer; i++)
	{
		final_result[final_pointer++] = compress[i];
//		fputc(compress[i], tmp_file);
	}
//	fclose(tmp_file);
	goto bin_end;
	
bin_write_bin:
	//dynamic_reordering_alphabet(compress, compress_pointer);
//	tmp_file = fopen("bin.cmp", "w");
	// writing the bin-stuff to file
	for(i = 0; i != bin_pointer; i++)
	{
		final_result[final_pointer++] = bin[i];
//		fputc(bin[i], tmp_file);
	}
//	fclose(tmp_file);
	
bin_end:
	// storing the pointer to the end of the bin-part
	bin_part = final_pointer;
	

	
	
	
	
	
	// constructing a translation-table for smaller differences between several values in the tag (characters)
	unsigned char ascii_table[255];
	n = 0;
	for(i = 0; i < 33; i++)
	{
		ascii_table[i] = 0;
	}
	n++;
	for(i = 65; i < 91; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[95] = n++;
	for(i = 97; i < 123; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 48; i < 58; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 33; i < 48; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 58; i < 65; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 91; i < 95; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[96] = n++;
	for(i = 123; i < 256; i++)
	{
		ascii_table[i] = n++;
	}
	
	
	
	unsigned char global_bits = 0;
	unsigned char global_min = 255;
	unsigned long global_max = 0;
	
	// detecting the total-min and average-max of all tag-values
	for(i = 0; i < tag_list_pointer-1; i++)
	{
		min = 255;
		max = 0;
		for(j = 0; tag_list[i][j] != 0; j++)
		{
			if(ascii_table[tag_list[i][j]] < min)
			{
				min = ascii_table[tag_list[i][j]];
			}
			if(ascii_table[tag_list[i][j]] > max)
			{
				max = ascii_table[tag_list[i][j]];
			}
		}
		global_max += max;
		if(global_min > min)
		{
			global_min = min;
		}
	}
	compress[0] = 0;
	compress_pointer = 0;
	compress_shifter = 8;
	global_max = (double)global_max/(double)(tag_list_pointer-1);
	global_bits = get_first_one(global_max - global_min + 1);
	
	// some header information
	bin_append(compress, &compress_pointer, &compress_shifter, global_min, 8);
	bin_append(compress, &compress_pointer, &compress_shifter, global_bits, 3);
	bin_append(compress, &compress_pointer, &compress_shifter, 0, 4);
	global_bits++;
	global_min--;
	
	
	
	// store the adress of the matching tag in the array to handle this if we reach this tag
	int *already_matched = malloc(sizeof(int)*filesize/4);
	for(i = 0; i < filesize/4; i++)
	{
		already_matched[i] = -1;
	}
	
	int match = 0;
	int adress = 0;
	
	for(i = 0; i < tag_list_pointer; i++)
	{
		
		// if this tag is not a new one we can write the adress of the first occurence of the tag
		if(already_matched[i] != -1)
		{
			// this is an adress => 1
			bin_append(compress, &compress_pointer, &compress_shifter, 1,1);
			
			// write the adress. the length is dependent of the already known tags
			bin_append(compress, &compress_pointer, &compress_shifter, already_matched[i], get_first_one(adress));
			continue;
		}
		min = 255;
		max = 0;
		
		// check if the global min/max work for this element too
		for(j = 0; tag_list[i][j] != 0; j++)
		{
			if(ascii_table[tag_list[i][j]] < min)
			{
				min = ascii_table[tag_list[i][j]];
			}
			if(ascii_table[tag_list[i][j]] > max)
			{
				max = ascii_table[tag_list[i][j]];
			}
		}
		
		// write this 0 for the 
		bin_append(compress, &compress_pointer, &compress_shifter, 0, 1);
		if(max > global_max && get_first_one(max-global_min+1)+1 > global_bits)
		{
			max = get_first_one(max-min+1)-1;
			
			// 0 for a new tag
			bin_append(compress, &compress_pointer, &compress_shifter, 0, 1);
			
			// the new min and max=bits needed
			bin_append(compress, &compress_pointer, &compress_shifter, min, 8);
			bin_append(compress, &compress_pointer, &compress_shifter, max, 3);
			min--;
			max++;
		}
		else
		{
			bin_append(compress, &compress_pointer, &compress_shifter, 1, 1);
			min = global_min;
			max = global_bits;
		}
		
		// write the complete element
		for(j = 0; tag_list[i][j] != 0; j++)
		{
			bin_append(compress, &compress_pointer, &compress_shifter, ascii_table[tag_list[i][j]] - min, max);
		}
		bin_append(compress, &compress_pointer, &compress_shifter, 0, max);
		
		// search for next elements that are exactly equal and store the adress to this tag
		for(n = i+1; n < tag_list_pointer; n++)
		{
			if(strcmp(tag_list[i], tag_list[n]) == 0)
			{
				already_matched[n] = adress;
				match++;
			}
		}
		adress++;
	}
	compress_pointer++;
	compress[1] |= ((compress_shifter == 0?7:(compress_shifter-1)) << 1);
	tags_pointer = dynamic_reordering_alphabet(compress, compress_pointer, tags);
	if(tags_pointer == -1)
	{
		goto tags_write_compress;
	}
	else
	{
		status |= 1 << 5;
		goto tags_write_tags;
	}

tags_write_compress:
//	tmp_file = fopen("tags.cmp", "w");
	for(i = 0; i < compress_pointer; i++)
	{
		final_result[final_pointer++] = compress[i];
//		fputc(compress[i], tmp_file);
	}
//	fclose(tmp_file);
	goto tags_end;


tags_write_tags:
//	tmp_file = fopen("tags.cmp", "w");
	for(i = 0; i < tags_pointer; i++)
	{
		final_result[final_pointer++] = tags[i];
//		fputc(tags[i], tmp_file);
	}
//	fclose(tmp_file);

	
tags_end:
	// storing the pointer to the end of the tag-part
	tag_part = final_pointer;
	
	// create a new ascii-table for the txt-part
	n = 0;
	n++;
	ascii_table[32] = n++;
	for(i = 65; i < 91; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[95] = n++;
	for(i = 97; i < 123; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[10] = n++;
	ascii_table[13] = n++;
	for(i = 48; i < 58; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 33; i < 48; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 58; i < 65; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 91; i < 95; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[96] = n++;
	for(i = 1; i < 10; i++)
	{
		ascii_table[i] = n++;
	}
	ascii_table[11] = n++;
	ascii_table[12] = n++;
	for(i = 14; i < 32; i++)
	{
		ascii_table[i] = n++;
	}
	for(i = 123; i < 256; i++)
	{
		ascii_table[i] = n++;
	}
	compress_pointer = dynamic_reordering_alphabet(txt, txt_pointer, compress);
	
	if(compress_pointer == -1)
	{
		compress_pointer = reduce_used_bits(txt, txt_pointer, ascii_table, compress);
		goto txt_write_compress;
	}
	status |= 1 << 4;
	txt_pointer = dynamic_reordering_alphabet(compress, compress_pointer, txt);
	if(txt_pointer != -1)
	{
		status |= 1 << 3;
		goto txt_write_txt;
	}
	


txt_write_compress:
//	tmp_file = fopen("txt.cmp", "w");
	for(i = 0; i < compress_pointer; i++)
	{
		final_result[final_pointer++] = compress[i];
//		fputc(compress[i], tmp_file);
	}
//	fclose(tmp_file);
	goto txt_end;
	
txt_write_txt:
	tmp_file = fopen("txt.cmp", "w");
	for(i = 0; i < txt_pointer; i++)
	{
		final_result[final_pointer++] = txt[i];
		fputc(txt[i], tmp_file);
	}
	fclose(tmp_file);
	
txt_end:
	;	// a label is not allowed to be before a initialization... so i inserted a empty command
	
	
	// the whole stuff should find a free one or two byte long seperator between the different parts
	// the problem is, that you need something else than a 0-byte because the compression uses 0-bytes too
	unsigned char there[65536];
	
	// searching for one byte delimiters
	for(i = 0; i < 256; i++)
	{
		there[i] = 0;
	}
	for(i = 0; i < final_pointer; i++)
	{
		there[final_result[i]] = 1;
	}
	for(i = 0; i < 256; i++)
	{
		if(there[i] == 0)
		{
			break;
		}
	}
	
	// if we can't find a value for one byte, we have to search in 2-byte-values
	if(i == 256)
	{
		for(i = 0; i < 65536; i++)
		{
			there[i] = 0;
		}
		for(i = 1; i < final_pointer; i++)
		{
			there[(final_result[i-1] << 8) | final_result[i]] = 1;
		}
		for(i = 0; i < 65536; i++)
		{
			if(there[i] == 0)
			{
				break;
			}
		}
		filesize = i; // just a tmp-var... don't care of the name
		
		// write all the stuff to the file
		fputc(status, cmp_file);
		fputc(filesize >> 8, cmp_file);
		fputc(filesize, cmp_file);
		for(i = 0; i < bin_part; i++)
		{
			fputc(final_result[i], cmp_file);
		}
		fputc(filesize >> 8, cmp_file);
		fputc(filesize, cmp_file);
		for(i = bin_part; i < tag_part; i++)
		{
			fputc(final_result[i], cmp_file);
		}
		fputc(filesize >> 8, cmp_file);
		fputc(filesize, cmp_file);
		for(i = tag_part; i < final_pointer; i++)
		{
			fputc(final_result[i], cmp_file);
		}
	}
	else
	{
		// write a status bit for the length of the delimiter and of course the other stuff to the file
		status |= (1 << 2);
		fputc(status, cmp_file);
		filesize = i;
		fputc(filesize, cmp_file);
		for(i = 0; i < bin_part; i++)
		{
			fputc(final_result[i], cmp_file);
		}
		fputc(filesize, cmp_file);
		for(i = bin_part; i < tag_part; i++)
		{
			fputc(final_result[i], cmp_file);
		}
		fputc(filesize, cmp_file);
		for(i = tag_part; i < final_pointer; i++)
		{
			fputc(final_result[i], cmp_file);
		}
	}
	fclose(cmp_file);
	
	// some freeing, i think the most OS's don't care about that
	
	free(bin);
	free(tags);
	free(tag_list);
	free(txt);
	free(compress);
	free(final_result);
	return 0;
}
