Original use case:
A large output file of test data where for whatever reason, data was not aligned on a row basis. We knew the correct amount of columns there should have been, but in some cases a new row was begun after only a few columns being populated. We were only interested in cleaning the file, so we could drop all data that was wrong.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int tabcount(char *str)
{
char *ptr = str;
int tabcount = 0;
while(*ptr)
{
if(*ptr == '\t')
{
tabcount+=1;
}
ptr++;
}
return tabcount;
}
int main(int argc, char *argv[])
{
// Convert command line argument for desired tabcount to int
char *a = argv[1];
int myTabs = atoi(a);
// Initialize c to read through file by character
int c;
// Initialize pointer to line in file
char *line;
// Open file
FILE *fp = fopen("ex_input.txt","r");
if (fp==NULL)
{
printf("file cannot be opened\n");
exit(0);
}
// Set tabcounter to zero
int tabs=0;
// Initialize int for memory allocation
int linelength=0;
// Initialize file seeker
int filseek = 0;
// Initialize file for good rows and bad rows separately
FILE *goodfile = fopen("goodfile.txt","w");
FILE *badfile = fopen("badfile.txt","w");
int i = 0;
// The first line of the input file is the header row
while((c = fgetc(fp)) != '\n')
{
fprintf(goodfile,"%c",c);
linelength++;
}
// The header row will always be in the good file
fprintf(goodfile,"\n");
// Move the file seeker pointer to the beginning of the next line after the header line
// Set the linelength counter to zero
filseek = linelength+1;
linelength=0;
// Enter the loop until the end of input file
while((c=fgetc(fp)) != EOF)
{
//Grow the linelength until you reach a newline character
linelength++;
//printf("%c",c);
if(c == '\n')
{
// Allocate memory for the line
line = calloc((linelength+1),sizeof(char));
//Move the file seeker again back to the end of the line
fseek(fp,(-linelength),SEEK_CUR);
// Read the line to line
fgets(line,(linelength),fp);
// Count the amount of tabs in your line
tabs=tabcount(line);
// In our example file, the *correct* amount of tabs in a line is 15
// Also each *correct* line starts with a standard character, in this case a dash (-)
// Both of these are given as command line arguments. Compile the program and run as ./<program_name> 15 -
if(tabs == myTabs && line[0] == *argv[2])
{
//Print a line with the desired properties to the good file
for(i = 0; i<=linelength;i++)
{
fprintf(goodfile,"%c",line[i]);
}
fprintf(goodfile,"\n");
}
// Otherwise print the line to badfile
else
{
for(i = 0; i<=linelength;i++)
{
fprintf(badfile,"%c",line[i]);
}
fprintf(badfile,"\n");
}
//Free the line
free(line);
//Move the fileseek to the beginning of the next line
filseek = filseek+linelength;
fseek(fp,(filseek),SEEK_SET);
//Set linelength to zero
linelength=0;
}
}
fclose(fp);
fclose(goodfile);
fclose(badfile);
}