Caesar cipher cracker

How can I refactor this with less code? This is homework and is cracking a Caesar cipher-text using frequency distribution.

I have completed the assignment but would like it to be cleaner.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define TEXT_SIZE 100000  // Note, the longer the text the more likely you will get a good decode from the start.
#define ALEN 26         // Number of chars in ENGLISH alphabet
#define CHFREQ "ETAONRISHDLFCMUGYPWBVKJXQZ" // Characters in order of appearance in English documents.
#define ALPHABET "ABCDEFGHIJKLMNOPQRSTUVWXYZ"


/* Decode the given text using the given map and store the result in newtext */
void decode_text(char* text, char* newtext, char map[][2]) {
int i, j;

for (i = 0; i < strlen(text); i++) {
    // If the current character in text is not a letter, copy it to newtext as punctuation and digits should be maintained.
    if (!isalpha(text[i])) {
        newtext[i] = text[i];
        continue;
    }
    for (j = 0; j < ALEN; j++) {
        if (text[i] == map[j][1]) {
            newtext[i] = map [j][0];
        }
    }
}
}

char upcase(char ch){
    if(islower(ch))
    ch -= 'a' - 'A';
  return ch;
}

int main(int argc, char **argv){

// first allocate some space for our input text (we will read from stdin).

char* text = (char*)malloc(sizeof(char)*TEXT_SIZE+1);
char textfreq[ALEN][2];
char map[ALEN][2];
char newtext[TEXT_SIZE];
char ch, opt, tmpc, tmpc2;
int i, j, tmpi;

// Check the CLI arguments and extract the mode: interactive or dump and store in opt.

if(!(argc == 2 && isalpha(opt = argv[1][1]) && (opt == 'i' || opt == 'd'))){
    printf("format is: '%s' [-d|-i]\n", argv[0]);
    exit(1);
}

// Now read TEXT_SIZE or feof worth of characters (whichever is smaller) and convert to uppercase as we do it.

for(i = 0, ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
    text[i] = (isalpha(ch)?upcase(ch):ch);
}
text[i] = '\0'; // terminate the string properly.

// Assign alphabet to one dimension of text frequency array and a counter to the other dimension
for (i = 0; i < ALEN; i++) {
    textfreq[i][0] = ALPHABET[i];
    textfreq[i][1] = 0;
}

// Count frequency of characters in the given text
for (i = 0; i < strlen(text); i++) {
    for (j = 0; j < ALEN; j++) {
        if (text[i] == textfreq[j][0]) textfreq[j][1]+=1;
    }
}

//Sort the character frequency array in descending order
for (i = 0; i < ALEN-1; i++) {
    for (j= 0; j < ALEN-i-1; j++) {
        if (textfreq[j][1] < textfreq[j+1][1]) {
            tmpi = textfreq[j][1];
            tmpc = textfreq[j][0];
            textfreq[j][1] = textfreq[j+1][1];
            textfreq[j][0] = textfreq[j+1][0];
            textfreq[j+1][1] = tmpi;
            textfreq[j+1][0] = tmpc;
        }
    }
}

//Map characters to most occurring English characters
for (i = 0; i < ALEN; i++) {
    map[i][0] = CHFREQ[i];
    map[i][1] = textfreq[i][0];
}

// Sort the map lexicographically
for (i = 0; i < ALEN-1; i++) {
    for (j= 0; j < ALEN-i-1; j++) {
        if (map[j][0] > map[j+1][0]) {
            tmpc = map[j][0];
            tmpc2 = map[j][1];
            map[j][0] = map[j+1][0];
            map[j][1] = map[j+1][1];
            map[j+1][0] = tmpc;
            map[j+1][1] = tmpc2;
        }
    }
}

if(opt == 'd'){
    decode_text(text, newtext, map);
} else {
// do option -i
}

// Print alphabet and map to stderr and the decoded text to stdout
fprintf(stderr, "\n%s\n", ALPHABET);
for (i = 0; i < ALEN; i++) {
    fprintf(stderr, "%c", map[i][1]);
}
printf("\n%s\n", newtext);

return 0;
}

Answer

A few simple comments :

Listen to your compiler

The compiler can give you a lot of really interesting information. Activate all warnings (I use gcc -Wall -Wextra -std=c99 cipher.c -o cipher_out) and you’ll see a few things to fix :

cipher.c: In function ‘decode_text’:
cipher.c:13:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
  for (i = 0; i < strlen(text); i++) {
                ^
cipher.c: In function ‘main’:
cipher.c:37:2: warning: implicit declaration of function ‘malloc’ [-Wimplicit-function-declaration]
  char* text = (char*)malloc(sizeof(char)*TEXT_SIZE+1);
  ^
cipher.c:37:22: warning: incompatible implicit declaration of built-in function ‘malloc’ [enabled by default]
  char* text = (char*)malloc(sizeof(char)*TEXT_SIZE+1);
                      ^
cipher.c:47:3: warning: implicit declaration of function ‘printf’ [-Wimplicit-function-declaration]
   printf("format is: '%s' [-d|-i]\n", argv[0]);
   ^
cipher.c:47:3: warning: incompatible implicit declaration of built-in function ‘printf’ [enabled by default]
cipher.c:48:3: warning: implicit declaration of function ‘exit’ [-Wimplicit-function-declaration]
   exit(1);
   ^
cipher.c:48:3: warning: incompatible implicit declaration of built-in function ‘exit’ [enabled by default]
cipher.c:53:2: warning: implicit declaration of function ‘fgetc’ [-Wimplicit-function-declaration]
  for(i = 0, ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
  ^
cipher.c:53:24: error: ‘stdin’ undeclared (first use in this function)
  for(i = 0, ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
                        ^
cipher.c:53:24: note: each undeclared identifier is reported only once for each function it appears in
cipher.c:53:2: warning: implicit declaration of function ‘feof’ [-Wimplicit-function-declaration]
  for(i = 0, ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
  ^
cipher.c:65:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
  for (i = 0; i < strlen(text); i++) {
                ^
cipher.c:112:2: warning: implicit declaration of function ‘fprintf’ [-Wimplicit-function-declaration]
  fprintf(stderr, "\n%s\n", ALPHABET);
  ^
cipher.c:112:2: warning: incompatible implicit declaration of built-in function ‘fprintf’ [enabled by default]
cipher.c:112:10: error: ‘stderr’ undeclared (first use in this function)
  fprintf(stderr, "\n%s\n", ALPHABET);
          ^
cipher.c:116:2: warning: incompatible implicit declaration of built-in function ‘printf’ [enabled by default]
  printf("\n%s\n", newtext);
  ^

You are missing :

#include <stdio.h>
#include <stdlib.h

And you should define i and j as unsigned int.

Format your code

The indentation seems a bit off in your question. I don’t know if it’s because you pasted the code in a wrong way or whatever but your favorite text editor can fix this easily.

Tell what you want to do

You are not describing anywhere how your program works : format is: program_name [-d|-i] is not enough if you don’t explain what the options are for.

Also, it should be described somewhere that your program will affect only letters for instance.

Move variables to the smallest possible scope

It keeps things easier to understand and to split into smaller functions.

At this stage, the code looks like :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define TEXT_SIZE 100000  // Note, the longer the text the more likely you will get a good decode from the start.
#define ALEN 26         // Number of chars in ENGLISH alphabet
#define CHFREQ "ETAONRISHDLFCMUGYPWBVKJXQZ" // Characters in order of appearance in English documents.
#define ALPHABET "ABCDEFGHIJKLMNOPQRSTUVWXYZ"


/* Decode the given text using the given map and store the result in newtext */
void decode_text(char* text, char* newtext, char map[][2]) {
    for (unsigned int i = 0; i < strlen(text); i++) {
        // If the current character in text is not a letter, copy it to newtext as punctuation and digits should be maintained.
        if (!isalpha(text[i])) {
            newtext[i] = text[i];
            continue;
        }
        for (unsigned int j = 0; j < ALEN; j++) {
            if (text[i] == map[j][1]) {
                newtext[i] = map [j][0];
            }
        }
    }
}

char upcase(char ch){
    if(islower(ch))
        ch -= 'a' - 'A';
    return ch;
}

int main(int argc, char **argv){
    // first allocate some space for our input text (we will read from stdin).
    char* text = (char*)malloc(sizeof(char)*TEXT_SIZE+1);
    char opt;
    int tmpi;

    // Check the CLI arguments and extract the mode: interactive or dump and store in opt.

    if(!(argc == 2 && isalpha(opt = argv[1][1]) && (opt == 'i' || opt == 'd'))){
        printf("format is: '%s' [-d|-i]\n", argv[0]);
        exit(1);
    }

    // Now read TEXT_SIZE or feof worth of characters (whichever is smaller) and convert to uppercase as we do it.
    {
        unsigned int i;
        for(char ch = fgetc(stdin); i < TEXT_SIZE && !feof(stdin); i++, ch = fgetc(stdin)){
            text[i] = (isalpha(ch)?upcase(ch):ch);
        }
        text[i] = '\0'; // terminate the string properly.
    }

    // Assign alphabet to one dimension of text frequency array and a counter to the other dimension
    char textfreq[ALEN][2];
    for (unsigned int i = 0; i < ALEN; i++) {
        textfreq[i][0] = ALPHABET[i];
        textfreq[i][1] = 0;
    }

    // Count frequency of characters in the given text
    for (unsigned int i = 0; i < strlen(text); i++) {
        for (unsigned int j = 0; j < ALEN; j++) {
            if (text[i] == textfreq[j][0]) textfreq[j][1]+=1;
        }
    }

    //Sort the character frequency array in descending order
    for (unsigned i = 0; i < ALEN-1; i++) {
        for (unsigned j= 0; j < ALEN-i-1; j++) {
            if (textfreq[j][1] < textfreq[j+1][1]) {
                tmpi = textfreq[j][1];
                char tmpc = textfreq[j][0];
                textfreq[j][1] = textfreq[j+1][1];
                textfreq[j][0] = textfreq[j+1][0];
                textfreq[j+1][1] = tmpi;
                textfreq[j+1][0] = tmpc;
            }
        }
    }

    //Map characters to most occurring English characters
    char map[ALEN][2];
    for (unsigned int i = 0; i < ALEN; i++) {
        map[i][0] = CHFREQ[i];
        map[i][1] = textfreq[i][0];
    }

    // Sort the map lexicographically
    for (unsigned int i = 0; i < ALEN-1; i++) {
        for (unsigned int j = 0; j < ALEN-i-1; j++) {
            if (map[j][0] > map[j+1][0]) {
                char tmpc = map[j][0];
                char tmpc2 = map[j][1];
                map[j][0] = map[j+1][0];
                map[j][1] = map[j+1][1];
                map[j+1][0] = tmpc;
                map[j+1][1] = tmpc2;
            }
        }
    }

    char newtext[TEXT_SIZE];
    if(opt == 'd'){
        decode_text(text, newtext, map);
    } else {
        // do option -i
    }

    // Print alphabet and map to stderr and the decoded text to stdout
    fprintf(stderr, "\n%s\n", ALPHABET);
    for (unsigned int i = 0; i < ALEN; i++) {
        fprintf(stderr, "%c", map[i][1]);
    }
    printf("\n%s\n", newtext);

    return 0;
}

Split your code into re-usable functions

I just realised that vnp comment explains this already. nothing to add here.

A few tiny code comments

You don’t need to use malloc in your case.

Also, you don’t need a continue in your decode function : an else would do the trick here.

Attribution
Source : Link , Question Author : Jake Barnby , Answer Author : SylvainD

Leave a Comment