2024-12-06 18:45:55 +04:00
# define _CRT_SECURE_NO_WARNINGS
# include <stdio.h>
# include <string.h>
# include <time.h>
# include <Windows.h>
# include "Dict.h"
int getNextDelim ( FILE * fp , char token [ ] ) ;
int getNextWord ( FILE * fp , char token [ ] , int maxLen ) ;
int LoadDictionary ( char * filename ) ;
int TextProcessing ( char * filenameIn , char * filenameOut ) ;
char filenameDict [ MAX_PATH ] = " ../Dictionaries/dict0.txt " ;
char filenameIn [ MAX_PATH ] = " ../Texts/Alice.txt " ;
char filenameOut [ MAX_PATH ] = " out/Alice_out.html " ;
2024-12-06 23:11:06 +04:00
double results [ 3 ] [ 15 ] ;
2024-12-06 18:45:55 +04:00
void test ( int i , int j ) {
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
printf ( " HTML = %s \n text = %s \n dict = %s \n " ,
filenameOut , filenameIn , filenameDict ) ;
LoadDictionary ( filenameDict ) ;
clock_t t0 = clock ( ) ;
TextProcessing ( filenameIn , filenameOut ) ;
clock_t t1 = clock ( ) ;
Destroy ( ) ;
double runtime = ( t1 - t0 ) / ( double ) CLOCKS_PER_SEC ;
results [ i ] [ j ] = runtime ;
printf ( " t1 - t0 = %.3f sec (Run time of HTML generating) \n \n " , runtime ) ;
}
void test_dicts ( int i ) {
2024-12-06 23:11:06 +04:00
for ( int j = 0 ; j < 5 ; j + + ) {
2024-12-06 18:45:55 +04:00
sprintf ( filenameDict , " ../Dictionaries/dict%d.txt " , j ) ;
test ( i , j ) ;
sprintf ( filenameDict , " ../Dictionaries/dict%da.txt " , j ) ;
2024-12-06 23:11:06 +04:00
test ( i , j + 5 ) ;
2024-12-06 18:45:55 +04:00
sprintf ( filenameDict , " ../Dictionaries/dict%db.txt " , j ) ;
2024-12-06 23:11:06 +04:00
test ( i , j + 10 ) ;
2024-12-06 18:45:55 +04:00
}
}
int main ( ) {
strcpy ( filenameIn , " ../Texts/Alice.txt " ) ;
strcpy ( filenameOut , " out/Alice_out.html " ) ;
test_dicts ( 0 ) ;
strcpy ( filenameIn , " ../Texts/Tolkien.txt " ) ;
strcpy ( filenameOut , " out/Tolkien_out.html " ) ;
test_dicts ( 1 ) ;
strcpy ( filenameIn , " ../Texts/Tolkien2.txt " ) ;
strcpy ( filenameOut , " out/Tolkien2_out.html " ) ;
test_dicts ( 2 ) ;
for ( int i = 0 ; i < 3 ; i + + ) {
2024-12-06 23:11:06 +04:00
for ( int j = 0 ; j < 15 ; j + + ) {
2024-12-06 18:45:55 +04:00
printf ( " %7.3lf " , results [ i ] [ j ] ) ;
}
printf ( " \n " ) ;
}
printf ( " \a " ) ;
return 0 ;
}
int LoadDictionary ( char * filename ) {
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD>
FILE * fin = fopen ( filename , " r " ) ;
if ( fin = = NULL ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD>
printf ( " File %s didn't open! \n " , filename ) ;
return 0 ;
}
Create ( ) ;
char token [ MAX_LEN_WORD ] ;
// <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD>
while ( ! feof ( fin ) ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD>
while ( getNextDelim ( fin , token ) ) {
}
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD>
if ( getNextWord ( fin , token , MAX_LEN_WORD ) ) {
Insert ( token ) ;
}
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
fclose ( fin ) ;
return 1 ;
}
int TextProcessing ( char * filenameIn , char * filenameOut ) {
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
FILE * fin = fopen ( filenameIn , " r " ) ;
if ( fin = = NULL ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD>
printf ( " File %s doesn't opened! \n " , filenameIn ) ;
return 0 ;
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
FILE * fout = fopen ( filenameOut , " w " ) ;
if ( fout = = NULL ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD>
printf ( " File %s doesn't opened! \n " , filenameOut ) ;
// <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD>
fclose ( fin ) ;
return 0 ;
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> HTML <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
fprintf ( fout , " <!DOCTYPE html> " ) ;
fprintf ( fout , " <html> " ) ;
fprintf ( fout , " <head> " ) ;
2024-12-06 20:08:12 +04:00
fprintf ( fout , " <meta http-equiv = \" Content-Type \" content = \" text/html; charset=cp1251 \" /> " ) ;
2024-12-06 18:45:55 +04:00
fprintf ( fout , " <title>HTML Document</title> " ) ;
fprintf ( fout , " </head> " ) ;
fprintf ( fout , " <body> " ) ;
char token [ MAX_LEN_WORD ] ;
// <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD>
while ( ! feof ( fin ) ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD>
while ( getNextDelim ( fin , token ) ) {
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
if ( strcmp ( token , " < " ) = = 0 ) {
fprintf ( fout , " < " ) ;
}
else if ( strcmp ( token , " > " ) = = 0 ) {
fprintf ( fout , " > " ) ;
}
else {
if ( strcmp ( token , " \n " ) = = 0 ) {
fprintf ( fout , " <br> " ) ;
}
fprintf ( fout , " %s " , token ) ;
}
}
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD>
if ( getNextWord ( fin , token , MAX_LEN_WORD ) ) {
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD>
if ( Member ( token ) ) {
fprintf ( fout , " <b>%s</b> " , token ) ;
}
else {
fprintf ( fout , " %s " , token ) ;
}
}
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> HTML <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> HTML
fprintf ( fout , " </body> " ) ;
fprintf ( fout , " </html> " ) ;
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD>
fclose ( fin ) ;
fclose ( fout ) ;
return 1 ;
}
int isalpha_my ( unsigned char ch ) ;
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 1 - <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> token <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> , <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
// <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 0.
// <20> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> token <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> .
int getNextDelim ( FILE * fp , char token [ ] )
{
int ch = getc ( fp ) ;
if ( ch = = EOF ) {
return 0 ;
}
if ( isalpha_my ( ( unsigned char ) ch ) ) {
ungetc ( ch , fp ) ;
return 0 ;
}
token [ 0 ] = ( unsigned char ) ch ;
token [ 1 ] = ' \0 ' ;
return 1 ;
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 1 - <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> token <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> , <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD>
// <20> <> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> . <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> maxLen <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <> <EFBFBD> <EFBFBD> <20> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 0.
// <20> <20> <> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> token <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> .
int getNextWord ( FILE * fp , char token [ ] , int maxLen )
{
int i = 0 ;
int ch ;
while ( ( ( ch = getc ( fp ) ) ! = EOF ) & & ( i < maxLen - 1 ) ) {
if ( ! isalpha_my ( ( unsigned char ) ( ch ) ) ) {
break ;
}
token [ i + + ] = ch ;
}
ungetc ( ch , fp ) ;
token [ i ] = ' \0 ' ;
if ( i = = 0 )
return 0 ;
return 1 ;
}
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 0 - <20> <> <EFBFBD> <EFBFBD> ch - <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> 1 - <20> <> <EFBFBD> <EFBFBD> ch - <20> <> <EFBFBD> <EFBFBD> <EFBFBD> .
// <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> (<28> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> < 128)
// <20> <20> <> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <20> <> <EFBFBD> <EFBFBD> <20> <> <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> ANSI
int isalpha_my ( unsigned char ch ) {
if ( isalpha ( ch ) )
return 1 ;
// ANSI <20> <> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> <EFBFBD> !!!
if ( ch > = 192 & & ch < = 223 )
return 1 ;
if ( ch > = 224 & & ch < = 255 )
return 1 ;
/*
if ( ch > = ' <EFBFBD> ' & & ch < = ' <EFBFBD> ' ) return 1 ;
if ( ch > = ' <EFBFBD> ' & & ch < = ' <EFBFBD> ' ) return 1 ;
if ( ch > = ' <EFBFBD> ' & & ch < = ' <EFBFBD> ' ) return 1 ;
if ( ch = = ' <EFBFBD> ' ) return 1 ;
if ( ch = = ' <EFBFBD> ' ) return 1 ; */
return 0 ;
}