227 lines
6.1 KiB
C
227 lines
6.1 KiB
C
#define _CRT_SECURE_NO_WARNINGS
|
||
#include <string.h>
|
||
#include <time.h>
|
||
#include <stdio.h>
|
||
|
||
#include "Dict.h"
|
||
|
||
int getNextDelim(FILE* fp, char token[]);
|
||
int getNextWord(FILE* fp, char token[], int maxLen);
|
||
|
||
int LoadDictionary(char* filename);
|
||
int TextProcessing(char* filenameIn, char* filenameOut);
|
||
|
||
|
||
char filenameDict[] = "c:\\Temp\\FIST2024\\TextMarkup\\dict2.txt";
|
||
//char filenameIn[] = "c:\\Temp\\FIST2024\\TextMarkup\\text1_out.html";
|
||
//char filenameOut[] = "c:\\Temp\\FIST2024\\TextMarkup\\text1_out_out.html";
|
||
//char filenameIn[] = "c:\\Temp\\FIST2024\\TextMarkup\\text1.txt";
|
||
//char filenameOut[] = "c:\\Temp\\FIST2024\\TextMarkup\\text1_out.html";
|
||
|
||
//char filenameIn[] = "c:\\Temp\\FIST2024\\TextMarkup\\Alice.txt";
|
||
//char filenameOut[] = "c:\\Temp\\FIST2024\\TextMarkup\\Alice_out.html";
|
||
char filenameIn[] = "c:\\Temp\\FIST2024\\TextMarkup\\Tolkien.txt";
|
||
char filenameOut[] = "c:\\Temp\\FIST2024\\TextMarkup\\Tolkien_out.html";
|
||
|
||
|
||
void main() {
|
||
// ñîîáùàåì êàêèå ôàéëû îáðàáàòûâàþòñÿ
|
||
printf("HTML file %s\nis created from text file %s\nwith highlighting words from %s\ndictionary\n\n\n",
|
||
filenameOut, filenameIn, filenameDict);
|
||
|
||
// t0 - ñêîëüêî ïðîøëî âðåìåíè îò ñòàðòà ïðîãðàììû äî ìîìåíòà âõîäà â ôóíêöèþ main()
|
||
long t0 = clock();
|
||
printf("t0 = %.3f sec \n", t0 / (float)CLOCKS_PER_SEC);
|
||
|
||
LoadDictionary(filenameDict);
|
||
|
||
// t1 - ñêîëüêî ïðîøëî âðåìåíè îò ñòàðòà ïðîãðàììû äî îêîí÷àíèÿ çàãðóçêè ñëîâàðÿ
|
||
int t1 = clock();
|
||
printf("t1 = %.3f sec \n", t1 / (float)CLOCKS_PER_SEC);
|
||
|
||
TextProcessing(filenameIn, filenameOut);
|
||
|
||
// t2 - ñêîëüêî ïðîøëî âðåìåíè îò ñòàðòà ïðîãðàììû äî îêîí÷àíèÿ êîíâåðòàöèè òåêñòà
|
||
long t2 = clock();
|
||
printf("t2 = %.3f sec \n", t2 / (float)CLOCKS_PER_SEC);
|
||
|
||
Destroy();
|
||
|
||
// t3 - ñêîëüêî ïðîøëî âðåìåíè îò îêîí÷àíèÿ êîíâåðòàöèè òåêñòà äî îêîí÷àíèÿ óíè÷òîæåíèÿ ñëîâàðÿ
|
||
long t3 = clock();
|
||
|
||
printf("t3 = %.3f sec \n", t3 / (float)CLOCKS_PER_SEC);
|
||
printf("t1 - t0 = %.3f sec (Run time of dictionary loading)\n", (t1 - t0) / (float)CLOCKS_PER_SEC);
|
||
printf("t2 - t1 = %.3f sec (Run time of HTML generating)\n", (t2 - t1) / (float)CLOCKS_PER_SEC);
|
||
printf("t3 - t2 = %.3f sec (Run time of dictionary destroying )\n", (t3 - t1) / (float)CLOCKS_PER_SEC);
|
||
}
|
||
|
||
|
||
int LoadDictionary(char* filename) {
|
||
// îòêðûòü ôàéë
|
||
FILE* fin = fopen(filename, "rt");
|
||
if (fin == NULL) {
|
||
// åñëè ôàéë íå ñìîãëè îòêðûòü - ñîîáùàåì îá ýòîì
|
||
printf("File %s doesn't opened!\n", filename);
|
||
return 0;
|
||
}
|
||
|
||
Create();
|
||
char token[MAX_LEN_WORD];
|
||
|
||
// ïîêà íå êîíåö ôàéëà
|
||
while (!feof(fin)) {
|
||
// ïîêà åñòü ðàçäåëèòåëü - áåðåì åãî
|
||
while (getNextDelim(fin, token)) {
|
||
}
|
||
// åñëè åñòü ñëîâî - áåðåì åãî
|
||
if (getNextWord(fin, token, MAX_LEN_WORD)) {
|
||
Insert(token);
|
||
}
|
||
}
|
||
// Çàêðûâàåì ôàéë ñ òåêñòîì
|
||
fclose(fin);
|
||
return 1;
|
||
}
|
||
|
||
|
||
|
||
int TextProcessing(char* filenameIn, char* filenameOut) {
|
||
// îòêðûòü ôàéë âõîäíîé
|
||
FILE* fin = fopen(filenameIn, "rt");
|
||
if (fin == NULL) {
|
||
// åñëè ôàéë íå ñìîãëè îòêðûòü - ñîîáùàåì îá ýòîì
|
||
printf("File %s doesn't opened!\n", filenameIn);
|
||
return 0;
|
||
}
|
||
|
||
// îòêðûòü ôàéë âûõîäíîé
|
||
FILE* fout = fopen(filenameOut, "wt");
|
||
if (fout == NULL) {
|
||
// åñëè ôàéë íå ñìîãëè îòêðûòü - ñîîáùàåì îá ýòîì
|
||
printf("File %s doesn't opened!\n", filenameOut);
|
||
// è çàêðûâàåì âõîäíîé ôàéë
|
||
fclose(fin);
|
||
return 0;
|
||
}
|
||
|
||
// Âûâîäèì â âûõîäíîé ôàéë çàãîëîâîê HTML äîêóìåíòà
|
||
fprintf(fout, "<!DOCTYPE html>");
|
||
fprintf(fout, "<html>");
|
||
fprintf(fout, "<head>");
|
||
fprintf(fout, "<meta http - equiv = \"Content-Type\" content = \"text/html; charset=utf-8\" />");
|
||
fprintf(fout, "<title>HTML Document</title>");
|
||
fprintf(fout, "</head>");
|
||
fprintf(fout, "<body>");
|
||
char token[MAX_LEN_WORD];
|
||
|
||
// ïîêà íå êîíåö ôàéëà
|
||
while (!feof(fin)) {
|
||
// ïîêà åñòü ðàçäåëèòåëü - áåðåì åãî
|
||
while (getNextDelim(fin, token)) {
|
||
// âûâîäèì ðàçäåëèòåëü
|
||
if (strcmp(token, "<") == 0) {
|
||
fprintf(fout, "<");
|
||
}
|
||
else if (strcmp(token, ">") == 0) {
|
||
fprintf(fout, ">");
|
||
}
|
||
else {
|
||
if (strcmp(token, "\n") == 0) {
|
||
fprintf(fout, "<br>");
|
||
}
|
||
fprintf(fout, "%s", token);
|
||
}
|
||
}
|
||
// åñëè åñòü ñëîâî - áåðåì åãî
|
||
if (getNextWord(fin, token, MAX_LEN_WORD)) {
|
||
// Åñëè ñëîâî åñòü â Ñëîâàðå – òî âûäåëÿåì åãî
|
||
if (Member(token)) {
|
||
fprintf(fout, "<b>%s</b>", token);
|
||
}
|
||
else {
|
||
fprintf(fout, "%s", token);
|
||
}
|
||
}
|
||
}
|
||
|
||
// âûâîäèò â HTML çàâåðøàþùèå òåãè äîêóìåíòà HTML
|
||
fprintf(fout, "</body>");
|
||
fprintf(fout, "</html>");
|
||
// çàêðûâàåì âõîäíîé è âûõîäíîé ôàéëà
|
||
fclose(fin);
|
||
fclose(fout);
|
||
|
||
return 1;
|
||
}
|
||
|
||
int isalpha_my(unsigned char ch);
|
||
|
||
// Âîçâðàùàåò 1 - åñëè èç ôàéëà ïðî÷èòàí ðàçäåëèòåëü.
|
||
//  ýòîì ñëó÷àå â token âîçâðàùàåòñÿ ñòðîêà, ñîäåðæàùàÿ
|
||
// ýòîò ðàçäåëèòåëü.
|
||
// Åñëè â ôàéëå áûë íå ðàçäåëèòåëü - âîçâðàùàåò 0.
|
||
//  ýòîì ñëó÷àå ñîñòîÿíèå token íåîïðåäåëåíî.
|
||
int getNextDelim(FILE* fp, char token[])
|
||
{
|
||
int ch = getc(fp);
|
||
if (ch == EOF) {
|
||
return 0;
|
||
}
|
||
if (isalpha_my((unsigned char)ch)) {
|
||
ungetc(ch, fp);
|
||
return 0;
|
||
}
|
||
token[0] = (unsigned char)ch;
|
||
token[1] = '\0';
|
||
return 1;
|
||
}
|
||
|
||
|
||
// Âîçâðàùàåò 1 - åñëè èç ôàéëà ïðî÷èòàíî ñëîâî.
|
||
//  ýòîì ñëó÷àå â token âîçâðàùàåòñÿ ñòðîêà, ñîäåðæàùàÿ
|
||
// ýòî ñëîâî. Ãàðàíòèðóåòñÿ ÷òî ñëîâî íå áîëååå maxLen ñèìâîëîâ.
|
||
// Åñëè â ôàéëå íå áûëî áóêâû - âîçâðàùàåò 0.
|
||
//  ýòîì ñëó÷àå token ñîäåðæèò ïóñòóþ ñòðîêó.
|
||
int getNextWord(FILE* fp, char token[], int maxLen)
|
||
{
|
||
int i = 0;
|
||
int ch;
|
||
while (((ch = getc(fp)) != EOF) && (i < maxLen - 1)) {
|
||
if (!isalpha_my((unsigned char)(ch))) {
|
||
break;
|
||
}
|
||
token[i++] = ch;
|
||
}
|
||
ungetc(ch, fp);
|
||
token[i] = '\0';
|
||
if (i == 0)
|
||
return 0;
|
||
return 1;
|
||
}
|
||
|
||
|
||
// Âîçâðàùàåò 0 - åñëè ch - íå áóêâà.
|
||
// Âîçâðàùàåò 1 - åñëè ch - áóêâà.
|
||
// Êîððåêòíî ðàáîòàåò äëÿ ëàòèíñêèõ áóêâ (ñ êîäàìè < 128)
|
||
// È äëÿ ðóññêèõ áóêâ èç êîäèðîâêè ANSI
|
||
int isalpha_my(unsigned char ch) {
|
||
if (isalpha(ch))
|
||
return 1;
|
||
|
||
// ANSI êîäèðîâêà!!!
|
||
if (ch >= 192 && ch <= 223)
|
||
return 1;
|
||
if (ch >= 224 && ch <= 255)
|
||
return 1;
|
||
|
||
/*
|
||
if (ch >= 'À' && ch <= 'ß') return 1;
|
||
if (ch >= 'à' && ch <= 'ï') return 1;
|
||
if (ch >= 'ð' && ch <= 'ÿ')return 1;
|
||
if (ch == '¸' ) return 1;
|
||
if (ch == '¨') return 1;*/
|
||
|
||
return 0;
|
||
}
|