commit 00d7dcfa88c08357d6f9be6b75968565ea872c31 Author: djairoh Date: Fri Feb 16 15:34:28 2024 +0100 assignment 1 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a6d3928 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +all: shell + +shell: + gcc -std=c99 -Wall -pedantic main.c scanner.c shell.c cmd.c -o shell + +bonus: + gcc -std=c99 -Wall -DEXT_PROMPT -pedantic main.c scanner.c shell.c cmd.c -o shell + +clean: + rm -f *~ + rm -f *.o + rm -f shell diff --git a/README.md b/README.md new file mode 100644 index 0000000..6ac6f86 --- /dev/null +++ b/README.md @@ -0,0 +1,157 @@ +# general code structure +The main objective of this assignment was to tokenize given inputs and process the resulting tokens according to the given grammar. We decided to seperate the two steps of tokenizing and processing almost entirely, which decouples the input parser from the execution very neatly. The one drawback of this method is that each input is, more or less, processed twice - but we considered this a fair trade-off, as the input for a shell can generally be expected to not be incredulously large. + +The crux of our approach lies in a slight addition to the `ListNode` struct: we have added a single `Enum Type` attribute which places each token within one of a few categories, roughly mapping to the grammar we are implementing. + +The possible values of the `Type` enum are: +Value | Definition | meaning +--- | --- | --- +EXECUTABLE | any non-[operator](#operators) | A command in either `$CWD` or `$PATH`. +OPTION | any non-[operator](#operators) | Any options to pass to an `EXECUTABLE` or `BUILTIN`. +COMPOSITION | '&&', '\|\|', ';', '\n' | Special characters which signal the end of a chain. +BUILTIN | a [builtin](#builtins) | One of a few pre-defined builtin executables. +PIPELINE | '\|' | A pipeline, indicating the preceding and following commands ought to be piped together. +REDIRECT | '<', '>' | Signals `stdin` and/or `stdout` should be handled differently. +BACKGROUND | '&' | Tells the shell to run a given chain as non-blocking (run in the background). +FILENAME | any non[-operator](#operators) | A file (for redirection). + + +### builtins +A builtin for our shell means any specific command which should be handled by the shell internally, as opposed to calling an external executable. +So far, we have implemented: +Builtin | Result +--- | --- +status | prints the return status of the last run command. +exit | exits the shell. +true | sets status to `0`. +false | sets status to `-1`. +cd | changes the current working directory. +debug | prints debugging information to `stdout`. + +### operators +An operator is a special token which tells the shell to process the preceding (and following) chain differently. +The standard operators are (or will be) implemented by this shell: +Operator | Name | Effect +--- | --- | --- +"&" | Background operator | Tells the shell ro run a chain in the background. +"&&" | Conditional and | Tells the shell to execute the next chain iff the status of the previous chain equals `0`. +"\|\|" | Conditional or | Tells the shell to execute the next chain iff the status of the previous chain does not equal `0`. +";" | Seperator | Tells the shell when a chain ends. +"<" | Redirect in | Tells the shell to take input from a non-`stdin` stream. +">" | Redirect out | Tells the shell to print output to a non-`stdout` stream. +"\|" | Pipe operator | Tells the shell to chain commands together. + + +## processing +After parsing a complete line and assigning each token a Type, the next step of the program is the actual processing. +To aid in this, we have constructed a few different types - defined in `cmd.h` - which model various parts of a typical shell execution. + +The `Command` struct models one executable and its' options, arranged in `char *` and `char **` to easily pass them to an `exec()` system call. +```c +typedef struct Command { + char **arguments; + char *command; + int capacity; + int numArguments; +} Command; +``` + +The `CommandList` struct models a list of Commands, and will be used when multiple commands are to be executed together (as is the case when piping commands, or using the background operator). +```c +typedef struct CommandList { + Command *commands; + int capacity; + int numCommands; +} CommandList; +``` + +The `Chain` struct models one complete chain to be executed. It is currently a bit barebones, but will be expanded when we implement piping, redirection, and running in the background. +```c +typedef struct Chain { + CommandList commands; + bool runInBackground; +} Chain; +``` + +These structs all come with some library functions for ease of use, which allow us to build up commands and chains, and execute them appropriately. + + +## main +The driver code behind the program lives (obviously) in `main()`. + +//TODO you were writing here + + +# bonuses +We have enhanced the functionality of our shell by implementing a few bonuses. These are described in this section. + +## cd builtin +Any self-respecting shell needs a way to change the current working directory. Luckily for the end-user, we have implemented exactly that! +Use the builtin `cd` to move around the system. We even added support for moving to the `$HOME` directory - accomplished by witholding any command options (just `cd` will get you home). + +Note that this builtin has side-effects, and might therefore edit `status`. If changing directories fails for whatever reason (insufficient permissions, destination doesn't exist, etc), `status` *will* be set to -1. + +This builtin provides interactions like these ('>' is input, added in post): +```shell +>realpath . +/home/djairoh/Nextcloud/opsys/assignments/1/src +>cd /home/djairoh/Documents +>realpath . +/home/djairoh/Documents +>cd +>realpath . +/home/djairoh +``` + +## debug mode +To get an insight into how the shell works under the hood, we've added a custom builtin: +with the `debug` flag, all executed commands will be laid out in glorious JSON format. + +This builtin is a toggle, meaning a second use will disable it once more. Provided below is some example input/output with this builtin on. Lines starting with '>' are input (the '>' was manually added post-fact). + +```shell +>debug +Toggled debug to 1. +>echo "hello, world!" +{ +"commandList": { + "commands": [ + { + "arguments": [ + "echo", + "hello, world!", + "(null)" + ], + "command": "echo", + "capacity": 4, + "numArguments": 3 + }], + "capacity": 4, + "numCommands": 1 + }, +"runInBackground": 0 +} +hello, world! +``` + +## shell prompt (coloured) +Standard in any shell is a prompt. These can range from minimalist (the `sh` prompt only shows the current version of the shell), to things as complicated and customizable as the [Starship](https://starship.rs/) prompt - of which an example is provided here: + +![An example of a Starship prompt](./assets/starship.png) + +While we didn't quite get around to this level of complexity, we have implemented a little prompt consisting of three components: + * the username of the person who started the shell + * the current working directory + * the status/status of the last command, indicated by '>' + +The end result looks like this: + +![Our own (less impressive) prompt](./assets/prompt.png) + +## true / false +The smallest of our extra implementations, we have added two `builtin`s which directly modify `status`. + + * The builtin `true` will set `status` to `0`. + * The builtin `false` will set `status` to `-1`. + + Really this was more of an implementation to allow us to do proper debugging, but it's still something we added to the shell that was not specifically part of the assignment, so we decided we may as well document it here. diff --git a/assets/cd.png b/assets/cd.png new file mode 100644 index 0000000..2c23c06 Binary files /dev/null and b/assets/cd.png differ diff --git a/assets/prompt.png b/assets/prompt.png new file mode 100644 index 0000000..4f85f50 Binary files /dev/null and b/assets/prompt.png differ diff --git a/assets/starship.png b/assets/starship.png new file mode 100644 index 0000000..9c6b19b Binary files /dev/null and b/assets/starship.png differ diff --git a/cmd.c b/cmd.c new file mode 100644 index 0000000..5949947 --- /dev/null +++ b/cmd.c @@ -0,0 +1,300 @@ +#include "cmd.h" +#include +#include +#include +#include +#include +#include "scanner.h" + +/** + * This function instantiates an empty command. + */ +Command _newCommand() { + Command cmd; + cmd.capacity = INITIAL_ARRAY_SIZE; + cmd.arguments = calloc(INITIAL_ARRAY_SIZE, sizeof(char**)); + cmd.numArguments = 0; + return cmd; +} + +/** + * This function instantiates an empty commandlist. + */ +CommandList _newCommandList() { + CommandList list; + list.capacity = INITIAL_ARRAY_SIZE; + list.commands = calloc(INITIAL_ARRAY_SIZE, sizeof(Command)); + list.numCommands = 0; + return list; +} + +/** + * This function instantiates an empty chain. + */ +Chain _newChain() { + Chain chain; + chain.commands = _newCommandList(); + chain.runInBackground = false; + return chain; +} + +/** + * This function frees a command. + * Note that it only frees the char**, not the strings/char*s themselves, as these are part of the TokenList. + */ +void freeCommand(Command cmd) { + free(cmd.arguments); +} + +/** + * This function frees a commandlist. + * To do so, it frees all commands in .numCommands + */ +void _freeCommandList(CommandList list) { + for (int i=0; i < list.numCommands; i++) { + freeCommand(list.commands[i]); + } + free(list.commands); +} + +// This function frees a chain. +void freeChain(Chain chain) { + _freeCommandList(chain.commands); +} + + +/** + * This function inserts a given argument into a given command. + * @param cmd the command to insert the argument into. + * @param str the argument to be inserted. + */ +void insertArgument_(Command *cmd, char* str) { + //resize arguments pointer if necessary. + if (cmd->numArguments >= cmd->capacity) { + cmd->arguments = realloc(cmd->arguments, 2*cmd->capacity*sizeof(Command)); + cmd->capacity *= 2; + } + cmd->arguments[cmd->numArguments++] = str; +} + +/** + * This function builds a command from a given listPointer. + * It expects the current listPointer to be either of type EXECUTABLE of BUILTIN, + * followed by any number of OPTIONs. + * @param lp the list to read from. + * @return a new Command. + */ +Command buildCommand(List *lp) { + Command cmd = _newCommand(); + + //exit on invalid syntax. This should not be reachable, if our parser is correct. + if ((*lp)->type != EXECUTABLE && (*lp)->type != BUILTIN) exit(1); + + // insert command as first entry + insertArgument_(&cmd, (*lp)->t); + *lp = (*lp)->next; + + //insert rest of arguments + while (*lp != NULL && (*lp)->type == OPTION) { + insertArgument_(&cmd, (*lp)->t); + *lp = (*lp)->next; + } + + // execvp() expects a NULL terminated list of options. + insertArgument_(&cmd, NULL); + + //for ease of access we define the command attribute as pointing towards the first arguments entry (which contains the command to be executed) + cmd.command = cmd.arguments[0]; + return cmd; +} + +/** + * This function inserts a command into a given commandList. + * @param cmd the Command to insert. + * @param List the commandList to insert the command into. + */ +void _insertCommand(Command cmd, CommandList *list) { + //resize commands pointer if necessary. + if (list->numCommands >= list->capacity) { + list->commands = realloc(list->commands, 2*list->capacity*sizeof(Command)); + list->capacity *= 2; + } + list->commands[list->numCommands++] = cmd; +} + +/** + * This function build up one chain of commands from a given listPointer. + * It parses one 'unit' of executables, meaning any number of commands+options chained together by pipes and redirects. + * It closely models the given grammar for a chain, witth the exception of also parsing the background operator '&'. (and it doesn't like builtins either, we handle those seperately). + * @param lp the list to construct a chain from. + * @return a new Chain. + */ +Chain buildChain(List *lp) { + Chain chain = _newChain(); + Command cmd; + + //exit on invalid syntax. This should not be reachable, if our parser is correct. + if ((*lp) == NULL || (*lp)->type != EXECUTABLE) exit(1); + + // so long as we have tokens to parse, do so. We exit from within the loop if: + // we encounter a syntax error (OPTION, BUILTIN, or FILENAME - these should be consumed as we construct the Chain) or + // we encounter a COMPOSITION operator - meaning this chain is complete, and the remainder of lp is to be processed elsewhere. + while ((*lp) != NULL) { + switch ((*lp)->type) { + // finding a command means we must insert it into the commandList. + case EXECUTABLE: + cmd = buildCommand(lp); + _insertCommand(cmd, &chain.commands); + break; + + //finding a chain operator means we want to continue evaluating, so we consume the operator. + case PIPELINE: + *lp = (*lp)->next; + break; + + // finding a redirect means a bunch of things we haven't thought out yet. + case REDIRECT: + //TODO: implement redirect + break; + + // finding a background operator means two things: + // 1. we must run this chain in the background. + // 2. we have reached the end of this chain. + case BACKGROUND: + chain.runInBackground = true; + *lp = (*lp)->next; + + // finding a COMPOSITION operator indicates we reached the end of this chain. + case COMPOSITION: + return chain; + + // finding any of an OPTION, BUILTIN, or FILENAME means a syntax error (and thus a faulty parser)- these should not be reachable. + case OPTION: + case BUILTIN: + case FILENAME: + freeChain(chain); + exit(1); + } + } + return chain; +} + +/** + * This function executes a given command in a child process. + * To make this a blocking operation, the parent process will wait until the child exits - simultaneously updating the status variable. + * @param cmd the Command to execute. + * @param status variable which will contain the exit status of the command. + */ +void _executeCommand(Command cmd, int* status) { + pid_t pid = fork(); + + switch (pid) { + // fork failed. + case -1: + printf("ERROR: failed to create child!\n"); + *status = -1; + break; + case 0: + // child process + execvp(cmd.command, cmd.arguments); + //this line is only ever reached if execvp fails (for example, when an executable can't be found). + exit(127); + break; + default:; + // parent process; wait for child and update status. + int stat; + wait(&stat); + if (WIFEXITED(stat)) { + *status = WEXITSTATUS(stat); + } + } +} + +// TODO: extend to include I/O redirection and background operation +/** + * This function executes all commands in a given chain. + * It updates the status as it does so. + * This function is barebones at the moment, but will be expanded to include support for redirection and non-blocking execution. + * @param chain the Chain to execute. + * @param status variable which will contain the exit status after execution. +*/ +void executeChain(Chain chain, int* status) { + for (int i=0; i < chain.commands.numCommands; i++) { + _executeCommand(chain.commands.commands[i], status); + } +} + +/** + * This function executes a BUILTIN. + * BUILTINs are pre-defined commands ran within the shell itself, as opposed to executables found in $PATH or $CWD. + * @param cmd the BUILTIN to execute. + * @param status the return status of the last run command. + * @return a boolean which indicates whether to keep running the shell or not. + */ +#if EXT_PROMPT +bool executeBuiltin(Command cmd, int *status, bool *debug) { +#else +bool executeBuiltin(Command cmd, int *status) { +#endif + if (!strcmp(cmd.command, "status")) { + printf("The most recent exit code is: %d.\n", *status); + } else if (!strcmp(cmd.command, "exit")) { + return false; + } else if (!strcmp(cmd.command, "true")) { + *status = 0; + } else if (!strcmp(cmd.command, "false")) { + *status = 1; + #if EXT_PROMPT + } else if (!strcmp(cmd.command, "debug")) { + *debug = ! *debug; + printf("Toggled debug to %d.\n", *debug); + } else if (!strcmp(cmd.command, "cd")) { + if (cmd.numArguments == 2) { + char *PWD = getenv("HOME"); + *status = chdir(PWD); + } else { + *status = chdir(cmd.arguments[1]); + } + #endif + }// can be expanded by growing the if/else chain. + return true; +} + +/** + * This function prints one Command in json. + * Intended to be used in tandem with printChain and printCommandList. + * @param cmd the command to print. + */ +void _printCommand(Command cmd) { + printf("{\n \"arguments\": [\n"); + for (int i=0; i < cmd.numArguments; i++) { + printf(" \"%s\"", cmd.arguments[i]); + if (i < cmd.numArguments-1) printf(",\n"); + } + printf("\n ],\n \"command\": \"%s\",\n \"capacity\": %d,\n \"numArguments\": %d\n }", cmd.command, cmd.capacity, cmd.numArguments); +} + +/** + * This function prints a commandList in json. + * Intended to be used with printCommand and printChain. + * @param list the list to print. + */ +void _printCommandList(CommandList list) { + printf("{\n \"commands\": [\n "); + for (int i=0; i < list.numCommands; i++) { + _printCommand(list.commands[i]); + if (i < list.numCommands-1) printf(", "); + } + printf("],\n \"capacity\": %d,\n \"numCommands\": %d\n }", list.capacity, list.numCommands); +} + +/** + * This function prints a Chain in json. + * Intended to be used with printCommand and printCommandList. + * @param chain the chain to print. + */ +void printChain(Chain chain) { + printf("{\n\"commandList\": "); + _printCommandList(chain.commands); + printf(",\n\"runInBackground\": %d\n}\n", chain.runInBackground); +} diff --git a/cmd.h b/cmd.h new file mode 100644 index 0000000..cfa7d33 --- /dev/null +++ b/cmd.h @@ -0,0 +1,49 @@ +#ifndef CMD_H +#define CMD_H + +#include +#include "scanner.h" +#define INITIAL_ARRAY_SIZE 4 + +// models one executable + options +// thanks to the use of pointers, not as wasteful of memory as it might at first appear. +// this struct makes it much easier to pass a TokenList to an exec() system call. +typedef struct Command { + char **arguments; + char *command; + int capacity; + int numArguments; +} Command; + +// models a list of commands +// used to properly handle piping. +typedef struct CommandList { + Command *commands; + int capacity; + int numCommands; +} CommandList; + +// models one complete chain of numCommands +// For lab1, somewhat barebones. +// Will be expanded upon when we implement redirection/background processes/piping +typedef struct Chain { + CommandList commands; + bool runInBackground; +} Chain; + + +void freeCommand(Command cmd); +void freeChain(Chain chain); + +Command buildCommand(List *lp); +Chain buildChain(List *lp); + +void executeChain(Chain chain, int* status); +#if EXT_PROMPT +bool executeBuiltin(Command cmd, int *status, bool *debug); +#else +bool executeBuiltin(Command cmd, int *status); +#endif + +void printChain(Chain chain); +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..99ea954 --- /dev/null +++ b/main.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "scanner.h" +#include "shell.h" +#include "cmd.h" + +// general TODO: +// * properly bind stdin/stdout for piped commands +// * run chains in bg if given '&' +// * manage < and > redirects +// * signal handling +// * builtins - 'kill' + +/** + * This function skips one COMMAND, including redirects and pipes (if any). + * This is used to move to the next command when a COMPOSITION operator fails. + * @param lp ListPointer containing remaining Tokens, + */ +void skipCommand(List *lp) { + *lp = (*lp)->next; + while (*lp != NULL) { + switch ((*lp)->type) { + case PIPELINE: + case REDIRECT: + case FILENAME: + case OPTION: + case EXECUTABLE: + case BUILTIN: + *lp = (*lp)->next; + break; + case COMPOSITION: + case BACKGROUND: + *lp = (*lp)->next; + return; + } + } +} + +/** + * This function checks whether the current COMPOSITION operator passes (and thus should execute the next function). + * @param s the COMPOSITION operator. + * @param status the exit status of the last ran command. + * @return whether the COMPOSITION succeeds/whether to execute the next command. + */ +bool compositionPasses(char *s, int status) { + if (!strcmp(s, "&&") && ! status) return true; + else if (!strcmp(s, "||") && status) return true; + else if (s[0] == ';' || s[0] == '\n') return true; + + return false; +} + +/** + * Driver function. Contains the main loop which collects and parses input, and decides what and how to execute + output. + */ +int main(int argc, char *argv[]) { + char *inputLine; + List tokenList; + int status = 0; + bool do_loop = true; + #if EXT_PROMPT + bool debug = false; + char cwd[101], *usr; + #endif + + // Disable buffering so we don't have to deal with out-of-order prints. + setbuf(stdin, NULL); + setbuf(stdout, NULL); + + // The main loop body. Each iteration means one line of input. + // Will run until either EOF or the BUILTIN exit is given. + while (do_loop) { + #if EXT_PROMPT + getcwd(cwd, sizeof(cwd)); + usr = getlogin(); + if (!status) printf("\x1b[33m%s \x1b[36m%s \x1b[32m>\x1b[0m ", usr, cwd); + if ( status) printf("\x1b[33m%s \x1b[36m%s \x1b[31m>\x1b[0m ", usr, cwd); + #endif + inputLine = readInputLine(); + // We have modified the readInputLine function to return NULL on EOF. + if (inputLine == NULL) { + do_loop = false; + break; + } + tokenList = getTokenList(inputLine); + + // Because lists are pointers, we need copies so we can access the list later. + List cpy = tokenList, og = tokenList; + if (parseInputLine(&tokenList) && tokenList == NULL ) { + // Consume the entire tokenList (through cpy). + while (cpy != NULL) { + switch (cpy->type) { + // A BUILTIN needs to be executed slightly differently than a COMMAND: + // BUILTINS cannot be redirected/piped. + case BUILTIN:; + Command cmd = buildCommand(&cpy); + // If execcuteBuiltin returns false, the BUILTIN was 'exit' and we should stop the shell. + #if EXT_PROMPT + if (! executeBuiltin(cmd, &status, &debug)) { + #else + if (!executeBuiltin(cmd, &status)) { + #endif + do_loop = false; + cpy = NULL; + } + freeCommand(cmd); + break; + + // Finding an EXECUTABLE means we need to construct the appropriate chain (in case of redirects/pipes), and then run it. + case EXECUTABLE:; + Chain chain = buildChain(&cpy); + #if EXT_PROMPT + if (debug) printChain(chain); + #endif + executeChain(chain, &status); + freeChain(chain); + // Status 127 is returned when an EXECUTABLE cannot be found. This requires an extra line to stdout. + if (status == 127) { + printf("Error: command not found!\n"); + } + break; + + // Upon finding a COMPOSITION, we check if it succeeds. if yes, we consume the operator and continue. + // If not, we skip the next command entirely. + case COMPOSITION:; + if (compositionPasses(cpy->t, status)) { + cpy = cpy->next; + } else { + skipCommand(&cpy); + } + break; + + // Encountering any of BACKGROUND, OPTION, PIPELINE, REDIRECT, or FILENAME here means either we are dealing with incorrect syntax, + // or a function did not correctly consume tokens. + case BACKGROUND: + case OPTION: + case PIPELINE: + case REDIRECT: + case FILENAME: + printf("Error: invalid syntax!\n"); + cpy = NULL; + break; + } + } + } else { + // If tokenList was not parsed successfully, the syntax must be incorrect. + printf("Error: invalid syntax!\n"); + } + + // Memory. + free(inputLine); + freeTokenList(og); + } + + return 0; +} diff --git a/scanner.c b/scanner.c new file mode 100644 index 0000000..ad590b7 --- /dev/null +++ b/scanner.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include +#include + +#include "scanner.h" + +//TODO: handle EOF more completely (currently only EOF at the start of the line is handled) +/** + * Reads an inputline from stdin. + * @return a string containing the inputline. + */ +char *readInputLine() { + int strLen = INITIAL_STRING_SIZE; + int c = getchar(); + int i = 0; + + // part of EOF handling; return NULL when EOF is encountered at the start of input. + // this is further handled in main(). + if (c == -1) { + return NULL; + } + + char *s = malloc((strLen + 1) * sizeof(*s)); + assert(s != NULL); + + bool quoteStarted = false; + while (c != '\n' || quoteStarted) { // Ensure that newlines in strings are accepted + if (c == '\"') { + quoteStarted = !quoteStarted; + } + s[i++] = c; + + if (i >= strLen) { // Resize the string if necessary + strLen = 2 * strLen; + s = realloc(s, (strLen + 1) * sizeof(*s)); + assert(s != NULL); + } + c = getchar(); + } + s[i] = '\0'; + return s; +} + +/** + * The function isOperatorCharacter checks whether the input paramater \param c is an operator. + * @param c input character. + * @return a bool denoting whether \param c is an operator. + */ +bool _isOperatorCharacter(char c) { + return c == '&' || c == '|' || c == ';' || c == '<' || c == '>'; +} + +/** + * Reads an identifier in string \param s starting at index \param start. + * @param s input string. + * @param start starting index in string \param s. + * @return a pointer to the start of the identifier string + */ +char *_matchIdentifier(char *s, int *start) { + int strLen = INITIAL_STRING_SIZE; + int pos = 0, offset = 0; + + char *ident = malloc((strLen + 1) * sizeof(*ident)); + assert(ident != NULL); + + bool quoteStarted = false; + size_t lenS = strlen(s); + while ((*start + offset <= lenS && !isspace(s[*start + offset]) && !_isOperatorCharacter(s[*start + offset])) || quoteStarted) { // Ensure that whitespace in strings is accepted + if (s[*start + offset] == '\"') { // Strip the quotes from the input before storing in the identifier + quoteStarted = !quoteStarted; + offset++; + continue; + } + ident[pos++] = s[*start + offset++]; + if (pos >= strLen) { // Resize the string if necessary + strLen = 2 * strLen; + ident = realloc(ident, (strLen + 1) * sizeof(*ident)); + assert(ident != NULL); + } + } + ident[pos] = '\0'; + *start = *start + offset; + return ident; +} + +/** + * The function newNode makes a new node for the token list and fills it with the token that + * has been read. Precondition: !isspace(a[*ip]). + * @param s input string. + * @param start starting index in string \param s. + * @return a list node that contains the current token. + */ +List _newNode(char *s, int *start) { + List node = malloc(sizeof(*node)); + assert(node != NULL); + node->next = NULL; + node->t = _matchIdentifier(s, start); + return node; +} + +/** + * Reads an operator in string \param s starting at index \param start. + * @param s input string. + * @param start starting index in string \param s. + * @return a pointer to the start of the operator string. + */ +char *_matchOperator(char *s, int *start) { + int strLen = 2; // the operator consists of *at most* 2 characters + int pos = 0, offset = 0; + + char *op = malloc((strLen + 1) * sizeof(*op)); + assert(op != NULL); + + while (_isOperatorCharacter(s[*start + offset])) { + op[pos++] = s[*start + offset++]; + } + op[pos] = '\0'; + *start = *start + offset; + return op; +} + +/** + * The function newOperatorNode makes a new operator node for the token list and fills it with the token that + * has been read. Precondition: !isspace(a[*ip]). + * @param s input string. + * @param start starting index in string \param s. + * @return a list node that contains the current token. + */ +List _newOperatorNode(char *s, int *start) { + List node = malloc(sizeof(*node)); + assert(node != NULL); + node->next = NULL; + node->t = _matchOperator(s, start); + return node; +} + +/** + * The function tokenList reads an array and puts the tokens that are read in a list. + * @param s input string. + * @return a pointer to the beginning of the list. + */ +List getTokenList(char *s) { + List lastNode = NULL; + List node = NULL; + List tl = NULL; + int i = 0; + int length = strlen(s); + while (i < length) { + if (isspace(s[i])) { // spaces are skipped + i++; + }else { + node = _isOperatorCharacter(s[i]) ? _newOperatorNode(s, &i) : _newNode(s, &i); + if (lastNode == NULL) { // there is no list yet + tl = node; + } else { // a list already exists; add current node at the end + (lastNode)->next = node; + } + lastNode = node; + } + } + return tl; +} + +/** + * Checks whether list \param l is empty. + * @param l input list. + * @return a bool denoting whether \param l is empty. + */ +bool isEmpty(List l) { + return l == NULL; +} + +/** + * The function printList prints the tokens in a token list, separated by commas. + * @param li the input list to be printed. + */ +void printList(List li) { + if (li == NULL) return; + printf("(\"%s\", \"%d\")", li->t, li->type); + li = li->next; + while (li != NULL) { + printf("(\"%s\", \"%d\")", li->t, li->type); + li = li->next; + } + printf("\n"); +} + +/** + * The function freeTokenlist frees the memory of the nodes of the list, and of the strings + * in the nodes. + * @param li the starting node of a list. + */ +void freeTokenList(List li) { + if (li == NULL) { + return; + } + free(li->t); + freeTokenList(li->next); + free(li); +} diff --git a/scanner.h b/scanner.h new file mode 100644 index 0000000..68b0496 --- /dev/null +++ b/scanner.h @@ -0,0 +1,37 @@ +#ifndef SCANNER_H +#define SCANNER_H + +#define INITIAL_STRING_SIZE 10 + +// This enum denotes what type of variable a token is. +enum Type { + EXECUTABLE, // a command in either $CWD or $PATH + OPTION, // any options to pass to an executable or builtin + COMPOSITION, // '&&', '||', ';', '\n' + BUILTIN, // one of a few pre-defined builtin executables + PIPELINE, // '|' + REDIRECT, // '<' or '>' + BACKGROUND, // '&' + FILENAME // a file (for redirection) +}; + +typedef struct ListNode *List; + +typedef struct ListNode { + char *t; + enum Type type; + List next; +} ListNode; + + +char *readInputLine(); + +List getTokenList(char *s); + +bool isEmpty(List l); + +void printList(List l); + +void freeTokenList(List l); + +#endif diff --git a/shell b/shell new file mode 100755 index 0000000..3f2b44e Binary files /dev/null and b/shell differ diff --git a/shell.c b/shell.c new file mode 100644 index 0000000..d6be059 --- /dev/null +++ b/shell.c @@ -0,0 +1,249 @@ +#include +#include + +#include "scanner.h" + +/** + * The function _acceptToken checks whether the current token matches a target identifier. + * if this is the case, it will mark this token as the given Type, then move to the next token. + * @param lp List pointer to the start of the tokenlist. + * @param ident target identifier + * @param type enum describing the nature to assign if the token is accepted. + * @return a bool denoting whether the current token matches the target identifier. + */ +bool _acceptToken(List *lp, char *ident, enum Type type) { + if (*lp != NULL && strcmp(((*lp)->t), ident) == 0) { + (*lp)->type = type; + *lp = (*lp)->next; + return true; + } + return false; +} + +/** + * The function parseExecutable parses an executable. + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the executable was parsed successfully. + */ +bool _parseExecutable(List *lp) { + char s = (*lp)->t[0]; + // Right now we only check if the first character is illegal (to prevent an operator token from mistakenly being assigned a COMMAND type.) + switch (s) { + case '|': + case '&': + case ';': + case '\n': + case '>': + case '<': + return false; + } + + (*lp)->type = EXECUTABLE; + *lp = (*lp)->next; + return true; +} + +/** + * Checks whether the input string \param s is an operator. + * @param s input string. + * @return a bool denoting whether the current string is an operator. + */ +//bool isOperator(char *s) { +bool _isOperator(List *lp) { + // NULL-terminated array makes it easy to expand this array later + // without changing the code at other places. + char *operators[] = { + "&", + "&&", + "||", + ";", + "<", + ">", + "|", + NULL + }; + + for (int i = 0; operators[i] != NULL; i++) { + if (strcmp((*lp)->t, operators[i]) == 0) { + return true; + } + } + return false; +} + +/** + * The function parseOptions parses options. + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the options were parsed successfully. + */ +bool _parseOptions(List *lp) { + while (*lp != NULL && !_isOperator(lp)) { + (*lp)->type = OPTION; + (*lp) = (*lp)->next; + } + return true; +} + +/** + * The function parseRedirections parses a command according to the grammar: + * + * ::= + * + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the command was parsed successfully. + */ +bool _parseCommand(List *lp) { + return _parseExecutable(lp) && _parseOptions(lp); +} + +/** + * The function parsePipeline parses a pipeline according to the grammar: + * + * ::= "|" + * | + * + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the pipeline was parsed successfully. + */ +bool _parsePipeline(List *lp) { + if (!_parseCommand(lp)) { + return false; + } + + if (_acceptToken(lp, "|", PIPELINE)) { + return _parsePipeline(lp); + } + + return true; +} + +/** + * The function parseFileName parses a filename. + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the filename was parsed successfully. + */ +bool _parseFilename(List *lp) { + //we run a POSIX compliant system, meaning all characters save '/' and NULL are allowed in filenames. + //NULL is already taken care of by the List library, and / just means the file is located in a directory. + //as a result the only limit we place on filenames are the reserved (operator) characters. + switch ((*lp)->t[0]) { + case '|': + case '&': + case ';': + case '\n': + case '>': + case '<': + return false; + } + (*lp)->type = FILENAME; + *lp = (*lp)->next; + return true; +} + +/** + * The function parseRedirections parses redirections according to the grammar: + * + * ::= + * | + * + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the redirections were parsed successfully. + */ +bool _parseRedirections(List *lp) { + if (isEmpty(*lp)) { + return true; + } + + if (_acceptToken(lp, "<", REDIRECT)) { + if (!_parseFilename(lp)) return false; + if (_acceptToken(lp, ">", REDIRECT)) return _parseFilename(lp); + else return true; + } else if (_acceptToken(lp, ">", REDIRECT)) { + if (!_parseFilename(lp)) return false; + if (_acceptToken(lp, "<", REDIRECT)) return _parseFilename(lp); + else return true; + } + + return true; +} + +/** + * The function parseBuiltIn parses a builtin. + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the builtin was parsed successfully. + */ +bool _parseBuiltIn(List *lp) { + // NULL-terminated array makes it easy to expand this array later + // without changing the code at other places. + char *builtIns[] = { + "exit", + "status", + "true", + "false", + #if EXT_PROMPT + "debug", + "cd", + #endif + NULL + }; + + for (int i = 0; builtIns[i] != NULL; i++) { + if (_acceptToken(lp, builtIns[i], BUILTIN)) { + return true; + } + } + + return false; +} + +/** + * The function parseChain parses a chain according to the grammar: + * + * ::= + * | + * + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the chain was parsed successfully. + */ +bool _parseChain(List *lp) { + if (_parseBuiltIn(lp)) { + return _parseOptions(lp); + } + if (_parsePipeline(lp)) { + return _parseRedirections(lp); + } + return false; +} + +/** + * The function parseInputLine parses an inputline according to the grammar: + * + * ::= & + * | && + * | || + * | ; + * | + * | + * + * @param lp List pointer to the start of the tokenlist. + * @return a bool denoting whether the inputline was parsed successfully. + */ +bool parseInputLine(List *lp) { + if (isEmpty(*lp)) { + return true; + } + + if (!_parseChain(lp)) { + return false; + } + + if (_acceptToken(lp, "&", BACKGROUND) || _acceptToken(lp, "&&", COMPOSITION)) { + return parseInputLine(lp); + } else if (_acceptToken(lp, "||", COMPOSITION)) { + return parseInputLine(lp); + } else if (_acceptToken(lp, ";", COMPOSITION)) { + return parseInputLine(lp); + } + + return true; +} diff --git a/shell.h b/shell.h new file mode 100644 index 0000000..0837c44 --- /dev/null +++ b/shell.h @@ -0,0 +1,9 @@ +#ifndef SHELL_SHELL_H +#define SHELL_SHELL_H + +#include "scanner.h" +#include + +bool parseInputLine(List *lp); + +#endif