OAIP/lab26/TextProcessingFile/TextProcessingFile.c

259 lines
7.3 KiB
C
Raw Normal View History

2024-12-06 15:17:07 +04:00
#define _CRT_SECURE_NO_WARNINGS
#include <string.h>
#include <time.h>
#include <stdio.h>
#include <Windows.h>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#define MAX_LEN_WORD 128
int getNextDelim(FILE* fp, char token[]);
int getNextWord(FILE* fp, char token[], int maxLen);
int TextProcessing(char* filenameIn, char* filenameOut);
char filenameDict[MAX_PATH] = "../Dictionaries/dict0.txt";
char filenameIn[MAX_PATH] = "../Texts/Alice.txt";
char filenameOut[MAX_PATH] = "out/Alice_out.html";
//char filenameIn[] = "c:\\Temp\\FIST2024\\TextMarkup\\Tolkien2.txt";
//char filenameOut[] = "c:\\Temp\\FIST2024\\TextMarkup\\Tolkien2_out.html";
void test(char filenameDict[], char filenameIn[], char filenameOut[]) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
2024-12-06 18:45:55 +04:00
printf("HTML file %s\nis created from text file %s\nwith highlighting words from %s\ndictionary\n\n",
2024-12-06 15:17:07 +04:00
filenameOut, filenameIn, filenameDict);
// t0 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> main()
long t0 = clock();
printf("t0 = %.3f sec \n", t0 / (float)CLOCKS_PER_SEC);
// t1 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int t1 = clock();
printf("t1 = %.3f sec \n", t1 / (float)CLOCKS_PER_SEC);
TextProcessing(filenameIn, filenameOut);
// t2 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long t2 = clock();
printf("t2 = %.3f sec \n", t2 / (float)CLOCKS_PER_SEC);
// t3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long t3 = clock();
printf("t3 = %.3f sec \n", t3 / (float)CLOCKS_PER_SEC);
printf("t1 - t0 = %.3f sec (Run time of dictionary loading)\n", (t1 - t0) / (float)CLOCKS_PER_SEC);
printf("t2 - t1 = %.3f sec (Run time of HTML generating)\n", (t2 - t1) / (float)CLOCKS_PER_SEC);
2024-12-06 18:45:55 +04:00
printf("t3 - t2 = %.3f sec (Run time of dictionary destroying )\n\n\n", (t2 - t1) / (float)CLOCKS_PER_SEC);
2024-12-06 15:17:07 +04:00
}
int main() {
//char filenameDict[MAX_PATH] = "../Dictionaries/dict0.txt";
//char filenameIn[MAX_PATH] = "../Texts/Alice.txt";
//char filenameOut[MAX_PATH] = "out/Alice_out.html";
2024-12-06 18:45:55 +04:00
for (int i = 0; i < 1; i++) {
/*sprintf(filenameDict, "../Dictionaries/dict%d.txt", i);
2024-12-06 15:17:07 +04:00
test(filenameDict, filenameIn, filenameOut);
sprintf(filenameDict, "../Dictionaries/dict%da.txt", i);
test(filenameDict, filenameIn, filenameOut);
sprintf(filenameDict, "../Dictionaries/dict%db.txt", i);
2024-12-06 18:45:55 +04:00
test(filenameDict, filenameIn, filenameOut);*/
strcpy(filenameIn, "../Texts/Tolkien.txt");
strcpy(filenameOut, "out/Tolkien_out.html");
sprintf(filenameDict, "../Dictionaries/dict0.txt");
test(filenameDict, filenameIn, filenameOut);
sprintf(filenameDict, "../Dictionaries/dict0a.txt");
test(filenameDict, filenameIn, filenameOut);
sprintf(filenameDict, "../Dictionaries/dict0b.txt");
test(filenameDict, filenameIn, filenameOut);
strcpy(filenameIn, "../Texts/Tolkien2.txt");
strcpy(filenameOut, "out/Tolkien2_out.html");
sprintf(filenameDict, "../Dictionaries/dict0.txt");
test(filenameDict, filenameIn, filenameOut);
sprintf(filenameDict, "../Dictionaries/dict0a.txt");
2024-12-06 15:17:07 +04:00
test(filenameDict, filenameIn, filenameOut);
2024-12-06 18:45:55 +04:00
sprintf(filenameDict, "../Dictionaries/dict0b.txt");
test(filenameDict, filenameIn, filenameOut);
2024-12-06 15:17:07 +04:00
}
return 0;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> word <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> filenameDict
int Member(char* word) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
FILE* fin = fopen(filenameDict, "rt");
if (fin == NULL) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD>
printf("File %s doesn't opened!\n", filenameDict);
return 0;
}
char token[MAX_LEN_WORD];
// <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
while (!feof(fin)) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
while (getNextDelim(fin, token)) {
}
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
if (getNextWord(fin, token, MAX_LEN_WORD)) {
if (strcmp(token, word) == 0) {
// <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>!
fclose(fin);
return 1;
}
}
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
fclose(fin);
return 0;
}
int TextProcessing(char* filenameIn, char* filenameOut) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
FILE* fin = fopen(filenameIn, "rt");
if (fin == NULL) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD>
printf("File %s doesn't opened!\n", filenameIn);
return 0;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
FILE* fout = fopen(filenameOut, "wt");
if (fout == NULL) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD>
printf("File %s doesn't opened!\n", filenameOut);
fclose(fin);
return 0;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> HTML <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
fprintf(fout, "<!DOCTYPE html>");
fprintf(fout, "<html>");
fprintf(fout, "<head>");
fprintf(fout, "<meta http-equiv = \"Content-Type\" content = \"text/html; charset=cp1251\" />");
fprintf(fout, "<title>HTML Document</title>");
fprintf(fout, "</head>");
fprintf(fout, "<body>");
char token[MAX_LEN_WORD];
// <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
while (!feof(fin)) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
while (getNextDelim(fin, token)) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (strcmp(token, "<") == 0) {
fprintf(fout, "&lt");
} else if (strcmp(token, ">") == 0) {
fprintf(fout, "&gt");
}
else {
if (strcmp(token, "\n") == 0) {
fprintf(fout, "<br>");
}
fprintf(fout, "%s", token);
}
}
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
if (getNextWord(fin, token, MAX_LEN_WORD)) {
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
if (Member(token)) {
fprintf(fout, "<b>%s</b>", token);
}
else {
fprintf(fout, "%s", token);
}
}
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> HTML <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> HTML
fprintf(fout, "</body>");
fprintf(fout, "</html>");
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
fclose(fin);
fclose(fout);
return 1;
}
int isalpha_my(unsigned char ch);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 - <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
// <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> token <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
// <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 0.
// <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> token <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
int getNextDelim(FILE* fp, char token[])
{
int ch = getc(fp);
if (ch == EOF) {
return 0;
}
if (isalpha_my((unsigned char)ch)) {
ungetc(ch, fp);
return 0;
}
token[0] = (unsigned char)ch;
token[1] = '\0';
return 1;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 - <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>.
// <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> token <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> maxLen <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
// <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 0.
// <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> token <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
int getNextWord(FILE* fp, char token[], int maxLen)
{
int i = 0;
int ch;
while (((ch = getc(fp)) != EOF) && (i < maxLen - 1)) {
if (!isalpha_my((unsigned char)(ch))) {
break;
}
token[i++] = ch;
}
ungetc(ch, fp);
token[i] = '\0';
if (i == 0)
return 0;
return 1;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 0 - <20><><EFBFBD><EFBFBD> ch - <20><> <20><><EFBFBD><EFBFBD><EFBFBD>.
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 - <20><><EFBFBD><EFBFBD> ch - <20><><EFBFBD><EFBFBD><EFBFBD>.
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> (<28> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> < 128)
// <20> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ANSI
int isalpha_my(unsigned char ch) {
if (isalpha(ch))
return 1;
// ANSI <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>!!!
if (ch >= 192 && ch <= 223)
return 1;
if (ch >= 224 && ch <= 255)
return 1;
/*
if (ch >= '<EFBFBD>' && ch <= '<EFBFBD>') return 1;
if (ch >= '<EFBFBD>' && ch <= '<EFBFBD>') return 1;
if (ch >= '<EFBFBD>' && ch <= '<EFBFBD>')return 1;
if (ch == '<EFBFBD>' ) return 1;
if (ch == '<EFBFBD>') return 1;*/
return 0;
}