..

[Lab Report] MIT 6.S081 Lab: Unix utilities (v2022)


sleep

This one is trivial:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int main(int argc, char* argv[]) {
    if (argc != 2) {
        fprintf(2, "Usage: sleep [time]\n");
        exit(1);
    }

    sleep(atoi(argv[1]));
    exit(0);
}

Remember to add sleep to UPROGS in Makefile:

UPROGS=\
	...
	$U/_zombie\
	$U/_sleep\

pingpong

This one is also trivial:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int main(int argc, char* argv[]) {
    int p2c[2], c2p[2]; 
    pipe(p2c); // parent-to-child pipe
    pipe(c2p); // child-to-parent pipe
    char buf[1];

    if (fork() == 0) { // child process
        // child write, parent read
        close(c2p[0]);
        close(p2c[1]);
        read(p2c[0], buf, 1);
        close(p2c[0]);
        printf("%d: received ping\n", getpid());
        write(c2p[1], buf, 1);
        close(c2p[1]);
    } else { // parent process
        // parent write, child read
        close(p2c[0]);
        close(c2p[1]);
        write(p2c[1], buf, 1);
        close(p2c[1]);
        read(c2p[0], buf, 1);
        printf("%d: received pong\n", getpid());
        close(c2p[0]);
    }
    exit(0);
}

Remember to add pingpong to UPROGS in Makefile:

UPROGS=\
	...
	$U/_sleep\
	$U/_pingpong\

primes

The pipeline of processes we would like to simulate can be visualized as the follows:

sieve.gif

The pipeline works like this: during each sieve process, the smallest prime number found in the current input is printed, and all multiples of that prime within the input are filtered out. The remaining numbers are then passed on to the next process. These processes continue until all numbers have been sieved out.

Implementing the pipeline is a straightforward task:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

// A single call to the child function is a sieve process that retrieves and outputs a prime number, x, from pLeft, and filters out all multiples of x.
// Simultaneously, the remaining numbers are passed on to the next sieve process for processing.
void child(int pLeft[2]) {
    close(pLeft[1]);
    int x;
    if (read(pLeft[0], &x, sizeof(x)) == 0) { // nothing is read
        exit(0);
    }

    int pRight[2];
    pipe(pRight);

    if (fork() == 0) {
        child(pRight);
    } else {
        close(pRight[0]);
        printf("prime %d\n", x);
        int num;
        while (read(pLeft[0], &num, sizeof(num)) != 0) {
            if (num % x != 0) {
                write(pRight[1], &num, sizeof(num));
            }
        }
        close(pLeft[0]);
        close(pRight[1]);
        wait(0);
    } 
    exit(0);
}

int main(int argc, char* argv[]) {
    int p[2];
    pipe(p);

    if (fork() == 0) {
        child(p);
    } else {
        close(p[0]);
        for (int i = 2; i <= 35; ++i) {
            write(p[1], &i, sizeof(i));
        }
        close(p[1]);
        wait(0);
    }
    exit(0);
}

Some side notes for the hint “Be careful to close file descriptors that a process doesn’t need, because otherwise your program will run xv6 out of resources before the first process reaches 35”:

  • The reason here is that when forking, all file descriptors of the parent process are copied into the child process. However, in xv6, each process can only have up to 16 file descriptors open. This can be shown in kernel/param.h and kernel/proc.h

    // kernel/param.h
    #define NOFILE       16  // open files per process
      
    // kernel/proc.h
    struct proc {
      ...
      struct file *ofile[NOFILE];  // Open files
      ...
    };
    
  • Since a pipeline opens both an input file and an output file, it occupies 2 file descriptors. As a result, when the child process created by forking copies the parent process’s file descriptors, the 16-file-descriptor limit is reached by the time the a certain layer of pipeline is reached. This causes the creation of new pipelines to fail.

find

Given that find follows a similar pattern to ls, making some minor modifications to ls would suffice:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"
#include "kernel/fs.h"

void find(char* path, char* target) {
    char buf[512], *p;
    int fd;
    struct dirent de;
    struct stat st;

    if ((fd = open(path, 0)) < 0) {
        fprintf(2, "find: cannot open %s\n", path);
        return;
    }

    if (fstat(fd, &st) < 0) {
        fprintf(2, "find: cannot stat %s\n", path);
        close(fd);
        return;
    }

    switch (st.type) {
    case T_FILE:
        if (strcmp(path + strlen(path) - strlen(target), target) == 0) {
            printf("%s\n", path);
        }
        break;
    case T_DIR:
        if (strlen(path) + 1 + DIRSIZ + 1 > sizeof buf) {
            printf("find: path too long\n");
            break;
        }
        strcpy(buf, path);
        p = buf + strlen(buf);
        *p++ = '/'; // a path starts with "/"
        while (read(fd, &de, sizeof(de)) == sizeof(de)) {
            if (de.inum == 0)
                continue;
            memmove(p, de.name, DIRSIZ);
            p[DIRSIZ] = 0;
            if (stat(buf, &st) < 0) {
                printf("find: cannot stat %s\n", buf);
                continue;
            }
            // do not recurse into "." and ".."
            if ((strcmp(de.name, ".") != 0) && (strcmp(de.name, "..") != 0)) {
                find(buf, target);
            }
        }
        break;
    }
    close(fd);
}

int main(int argc, char* argv[]) {
    if (argc < 3) {
        exit(1);
    }

    char* target_dir = argv[1];
    char* target_file = argv[2];
    find(target_dir, target_file);
    exit(0);
}

xargs

Simply append each line to the program arguments. Once all the arguments have been appended, execute the program with the complete set of arguments:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"
#include "kernel/param.h"

#define MAXARGLEN 64

int main(int argc, char* argv[]) {
    char params[MAXARG][MAXARGLEN];
    char *processed_params[MAXARG];
    char buf[1];

    while (1) {
        memset(params, 0, MAXARG * MAXARGLEN);
        for (int i = 1; i < argc; ++i) {
            strcpy(params[i-1], argv[i]);
        }

        int curr_arg = argc - 1, arg_offset = 0;
        int read_res;
        while ((read_res = read(0, buf, 1)) != 0) {
            if (buf[0] == ' ') {
                curr_arg++;
                arg_offset = 0;
                continue;
            } else if (buf[0] == '\n') {
                break;
            }
            params[curr_arg][arg_offset++] = buf[0];
        }

        // stop reading lines when encountering EOF
        if (read_res == 0) {
            break;
        }

        for (int i = 0; i <= curr_arg; ++i) {
            processed_params[i] = params[i];
        }
        if (fork() == 0) {
            exec(argv[1], processed_params);
            exit(1);
        } else {
            wait(0);
        }
    }
    
    exit(0);
}