/* 12.010 Homework #3 Question 2 Program to read a paragraph and comoute the mean and rms about the mean of the number of characters per word and the number of words per sentence. */ #include #include #include #include int main(argc, argv) int argc ; // Number of arguments char *argv[] ; // Pointer to arguments { int inc ; // Input Character read from file (EOF means end of file) int num_char = 0 ; // Number of characters in current word int num_word = 0 ; // Number of words in while file int num_sent = 0 ; // Number of sentences in file int num_word_insent = 0 ; // Number of words in current sentence int sum_num_char = 0 ; // Sum of total number of chacters int sum_num_word = 0 ; // Sum of number of words int sum_num_char_sq = 0 ; // Sum of number of characters squared int sum_num_word_sq = 0 ; // Sun of number of words squared int still_in_word = 0; /* set non-zero for hypenated word to show we are still in word on new line */ int in_gap = 0 ; /* Set non-zero when space is found incase there are more than one-space between words and when the end of sentence is found. */ float mean_char, rms_char ; // Mean and RMS number of characters per word float mean_sent, rms_sent ; // Mean and RMS of words per sentence. float rmssq ; // Square of RMS char filename[128]; /* Name of file to read, passes in runstring */ FILE *fp, *fopen() ; /* File pointer to open files /* See how many arguments were passed */ if ( argc > 1 ) { strcpy(filename,argv[1]); } else { strcpy(filename,"Q2_text.txt"); } printf("\n12.010 HW 3 Q2:\nReading file %s\n",filename); /* Now try to open the file */ fp = fopen(filename,"r"); if ( fp == NULL ) { printf("Error opening file %s\n",filename); return(-1); } /* Start reading file. We will read one-character at a time and count the statistics as we go. We could also read line-by-line as in the fortran code. */ inc = 0; for ( ; inc != EOF ; ) { inc = fgetc(fp); if ( inc != EOF && inc != '\n' ) { /* See how we should count this character */ if( (inc >= 'a' && inc <= 'z') || (inc >= 'A' && inc <= 'Z') ) { /* regulare letter */ num_char++ ; in_gap = 0; } else if( inc == ' ' && in_gap == 0 ) { /* End of a word or line but not repeated space */ num_word++ ; num_word_insent++ ; sum_num_char = sum_num_char + num_char ; sum_num_char_sq = sum_num_char_sq + num_char*num_char; // Reset number of characters and that we are in gap num_char = 0 ; in_gap = 1; } else if( inc == '.' || inc == '?' ) { /* End of a sentencc and so increment same as space */ if( num_char > 0 ) { num_word++ ; num_word_insent++ ; sum_num_char = sum_num_char + num_char ; sum_num_char_sq = sum_num_char_sq + num_char*num_char ; // Reset number of characters and that we are in gap num_char = 0 ; } /* Now process end of sentence */ num_sent++ ; sum_num_word = sum_num_word + num_word_insent ; sum_num_word_sq = sum_num_word_sq + num_word_insent*num_word_insent; num_word_insent = 0 ; in_gap = 1; } else if ( inc == '-' ) { /* Hypenated word, so keep character count going */ still_in_word = 1; /* Ignore any other characeters (:, ;, numbers) */ } } /* End of not EOF or new line /* See if have reached the end of a line and normally this would mark the end of a word. Increment counts and for hypen */ if( inc == '\n' ) { if( still_in_word == 0 ) { if( num_char > 0 ) { num_word++ ; num_word_insent++ ; sum_num_char = sum_num_char + num_char ; sum_num_char_sq = sum_num_char_sq + num_char*num_char ; // Reset number of characters and that we are in gap num_char = 0 ; } } still_in_word = 0; } } /* End of reading file /* Now finish up the calcuations */ mean_char = (float) sum_num_char/num_word ; if( num_word > 1 ) { rmssq = (sum_num_char_sq - num_word*mean_char*mean_char)/(num_word-1); // rms_char = sqrtf(rmssq); rms_char = sqrtf((sum_num_char_sq - num_word*mean_char*mean_char)/(num_word-1)); } else { rms_char = 0.0; } mean_sent = (float) sum_num_word /num_sent ; if( num_sent > 1 ) { rmssq = (sum_num_word_sq - num_sent*mean_sent*mean_sent)/(num_sent-1); rms_sent = sqrtf(rmssq); } else { rms_sent = 0.0; } /* Output the results */ printf("\n12.010 HW03_02: In file %s there are:\n",filename); printf("Mean characters per word %5.2f with RMS %5.2f in %4d words;\n",mean_char, rms_char, num_word); printf("Mean words per sentence %5.2f with RMS %5.2f in %4d sentences.\n",mean_sent, rms_sent, num_sent); inc = fclose(fp); }