Vocabulary Task (C language) Solution given below. How to fix the error in the picture attached. txt file is not being created from this code please also include how to create a txt file and where will it be saved in the computer Natural language processing (NLP) is a field of artificial intelligence that seeks to develop the ability of a computer program to understand human language. Usually, the first step of an NLP system is to convert words into numeric codes. Thus, the system converts an input text into a sequence of numeric codes before any high-level analysis. This process is known as text preprocessing. We can only perform text preprocessing if we have a vocabulary of words and their associated numeric codes. Your task is to create a vocabulary of unique words for a given text file and assign a different number from 1 to N to each unique word, with N being the total number of unique words. You must perform this assignment so that the first word in alphabetical order gets the number 1, the second word in alphabetical order gets the number 2, and so on. A word is a sequence of letters (uppercase or lowercase). The file is composed of letters and white spaces (spaces, tabs, newlines). White spaces serve as word separators and cannot be part of any word. A file can have multiple consecutive separators. Different case variations of the same word (The, the, and THE) must be considered the same. All vocabulary words must contain uppercase letters only. Your program will receive two command-line arguments, the name of the input text file and the name of the file where the vocabulary must be saved. Example: $ ./a.out inputX.txt vocabularyX.txt Each line of the output file must contain a number (the numeric code) and a word (a unique word) separated by a space, and the words must be in alphabetical order. Below are some examples of input and expected output. Examples (your program must follow this format precisely) Example #1 input0.txt the THE The ha Ha HA vocabulary0.txt 1 HA 2 THE Example #2 input1.txt Lorem ipsum dolor sit amet consectetur adipiscing elit Ut commodo nec magna et sodales vocabulary1.txt 1 ADIPISCING 2 AMET 3 COMMODO 4 CONSECTETUR 5 DOLOR 6 ELIT 7 ET 8 IPSUM 9 LOREM 10 MAGNA 11 NEC 12 SIT 13 SODALES 14 UT Solution: #include #include #include #include #define MAX_WORDS 1000 #define MAX_WORD_LENGTH 100 /* Function prototypes */ void to_uppercase(char *); int compare_words(const void *, const void *); int main(int argc, char *argv[]) { /* Check command line arguments */ if (argc != 3) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } /* Open input file */ FILE *input_file = fopen(argv[1], "r"); if (input_file == NULL) { fprintf(stderr, "Error opening input file %s\n", argv[1]); return 1; } /* Open output file */ FILE *output_file = fopen(argv[2], "w"); if (output_file == NULL) { fprintf(stderr, "Error opening output file %s\n", argv[2]); return 1; } /* Read words from input file */ char words[MAX_WORDS][MAX_WORD_LENGTH]; int num_words = 0; char word[MAX_WORD_LENGTH]; while (fscanf(input_file, "%s", word) == 1) { /* Convert word to uppercase */ to_uppercase(word); /* Check if word already exists in array */ int i; for (i = 0; i < num_words; i++) { if (strcmp(word, words[i]) == 0) { break; } } /* Add word to array if it doesn't exist */ if (i == num_words) { strcpy(words[num_words], word); num_words++; } } /* Sort words alphabetically */ qsort(words, num_words, MAX_WORD_LENGTH, compare_words); /* Write words to output file */ int i; for (i = 0; i < num_words; i++) { fprintf(output_file, "%d %s\n", i+1, words[i]); } /* Close files */ fclose(input_file); fclose(output_file); return 0; } /* Convert a string to uppercase */ void to_uppercase(char *str) { while (*str) { *str = toupper(*str); str++; } } /* Compare two words for sorting alphabetically */ int compare_words(const void *a, const void *b) { const char *word_a = (const char *)a; const char *word_b = (const char *)b; return strcmp(word_a, word_b); }
Vocabulary
Task (C language)
Solution given below. How to fix the error in the picture attached. txt file is not being created from this code
please also include how to create a txt file and where will it be saved in the computer
Natural language processing (NLP) is a field of
We can only perform text preprocessing if we have a vocabulary of words and their associated numeric codes. Your task is to create a vocabulary of unique words for a given text file and assign a different number from 1 to N to each unique word, with N being the total number of unique words. You must perform this assignment so that the first word in alphabetical order gets the number 1, the second word in alphabetical order gets the number 2, and so on.
A word is a sequence of letters (uppercase or lowercase). The file is composed of letters and white spaces (spaces, tabs, newlines). White spaces serve as word separators and cannot be part of any word. A file can have multiple consecutive separators. Different case variations of the same word (The, the, and THE) must be considered the same. All vocabulary words must contain uppercase letters only.
Your program will receive two command-line arguments, the name of the input text file and the name of the file where the vocabulary must be saved. Example:
$ ./a.out inputX.txt vocabularyX.txt
Each line of the output file must contain a number (the numeric code) and a word (a unique word) separated by a space, and the words must be in alphabetical order. Below are some examples of input and expected output.
Examples (your program must follow this format precisely)
Example #1
input0.txt
the THE The ha Ha HA
vocabulary0.txt
1 HA
2 THE
Example #2
input1.txt
Lorem ipsum dolor sit amet consectetur adipiscing elit
Ut commodo nec magna et sodales
vocabulary1.txt
1 ADIPISCING
2 AMET
3 COMMODO
4 CONSECTETUR
5 DOLOR
6 ELIT
7 ET
8 IPSUM
9 LOREM
10 MAGNA
11 NEC
12 SIT
13 SODALES
14 UT
Solution:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORDS 1000
#define MAX_WORD_LENGTH 100
/* Function prototypes */
void to_uppercase(char *);
int compare_words(const void *, const void *);
int main(int argc, char *argv[]) {
/* Check command line arguments */
if (argc != 3) {
fprintf(stderr, "Usage: %s <input_file> <output_file>\n", argv[0]);
return 1;
}
/* Open input file */
FILE *input_file = fopen(argv[1], "r");
if (input_file == NULL) {
fprintf(stderr, "Error opening input file %s\n", argv[1]);
return 1;
}
/* Open output file */
FILE *output_file = fopen(argv[2], "w");
if (output_file == NULL) {
fprintf(stderr, "Error opening output file %s\n", argv[2]);
return 1;
}
/* Read words from input file */
char words[MAX_WORDS][MAX_WORD_LENGTH];
int num_words = 0;
char word[MAX_WORD_LENGTH];
while (fscanf(input_file, "%s", word) == 1) {
/* Convert word to uppercase */
to_uppercase(word);
/* Check if word already exists in array */
int i;
for (i = 0; i < num_words; i++) {
if (strcmp(word, words[i]) == 0) {
break;
}
}
/* Add word to array if it doesn't exist */
if (i == num_words) {
strcpy(words[num_words], word);
num_words++;
}
}
/* Sort words alphabetically */
qsort(words, num_words, MAX_WORD_LENGTH, compare_words);
/* Write words to output file */
int i;
for (i = 0; i < num_words; i++) {
fprintf(output_file, "%d %s\n", i+1, words[i]);
}
/* Close files */
fclose(input_file);
fclose(output_file);
return 0;
}
/* Convert a string to uppercase */
void to_uppercase(char *str) {
while (*str) {
*str = toupper(*str);
str++;
}
}
/* Compare two words for sorting alphabetically */
int compare_words(const void *a, const void *b) {
const char *word_a = (const char *)a;
const char *word_b = (const char *)b;
return strcmp(word_a, word_b);
}
Trending now
This is a popular solution!
Step by step
Solved in 2 steps