Trigrams

February 18, 2023

Split a string into trigrams for text matching purposes.

The input is a string and the return value is an array of strings.

/* Author: Jeremy Heyno
 * Licensed under GPLv3 */


# include <stdio.h>
# include <stdlib.h>
# include <string.h>

char **return_trigrams(const char *string)
{

	const size_t narrays = strlen(string);
	char **trigram_strings = malloc((narrays)*sizeof(char*));
	if(narrays>3)
	{
		size_t i = 0;
		if(trigram_strings != NULL){
			for (i = 0; i < narrays; i++){
				trigram_strings[i] = calloc(4,sizeof(char));
				if(i==0)
				{
					strcat(trigram_strings[i]," ");
					char *temp_ptr=calloc(3,sizeof(char));
					strncpy(temp_ptr,string,2);
					strcat(trigram_strings[i],temp_ptr);
					free(temp_ptr);
				}
				else if(i==narrays-1)
				{
					char *temp_ptr=calloc(3,sizeof(char));
					strncpy(temp_ptr,string+strlen(string)-2,2);
					strcat(trigram_strings[i],temp_ptr);
					strcat(trigram_strings[i]," ");
					free(temp_ptr);	
				}
				else
				{
					strncpy(trigram_strings[i],string,3);
					string++;
				}
			}
		}
	}
	else
	{
		trigram_strings = realloc(trigram_strings,1*sizeof(char *));
		trigram_strings[0] = calloc(4,sizeof(char)); 
		strncpy(trigram_strings[0], string, narrays);

	}
	return(trigram_strings);
}


int main(void){

	char *mystring = "stringgg";
	char **string = return_trigrams(mystring);

	if((int) strlen(mystring)>3)
	{
		for(int i = 0; i < (int)strlen(mystring); i++)
		{
			printf("%s\n", string[i]);
			free(string[i]);

		}
		free(string);
	}
	else
	{
		for(int i = 0; i < (int)strlen(string[0]);i++)
		{
			printf("%c", string[0][i]);
		}
		printf("\n");
		free(string[0]);
		free(string);
	}
}