/* This application is implemented in the C programming language. The application reads files from the file system and imports them to the SIETS storage through HTTP POST interface using libcurl. The application receives file names as command line arguments. It also detects whether the file is a text file or a binary file by counting whitespaces in it: if a file contains relatively less whitespaces, it is considered to be a binary file, and if a file contains relatively more whitespaces, it is considered to be a text file. */ // include standard headers #include #include #include #include #include #include // libcurl #include // connection parameters char *url = "http://127.0.0.1/cgi-bin/siets/api.cgi"; char *storage = "test"; char *user = "guest"; char *passwd = "guest"; char *encoding = "US-ASCII"; char *post_fmt = "storage=%s&command=insert&user=%s&password=%s&id=%s&title=%s&rate=%d&text=%s&encoding=%s"; #define REQUIRED_WHITESPACE_FRACTION 0.12 typedef struct { int len, used; char *buf; } curl_reply; // callback for reading HTTP response size_t read_reply(void *buffer, size_t size, size_t nmemb, void *userp) { int new_len; curl_reply *r = (curl_reply *) userp; for (new_len = r->len; new_len < r->used + size * nmemb + 1; new_len *= 2); if (new_len > r->len) r->buf = realloc(r->buf, new_len); memcpy(r->buf + r->used, buffer, size * nmemb); r->len = new_len; r->used += size * nmemb; r->buf[r->used] = '\0'; return size * nmemb; } int main(int argc, char *argv[]) { CURL *curl_handle; char *storage_esc, *user_esc, *passwd_esc, *title_esc, *text_esc, *encoding_esc; curl_reply reply; char *err_buf[CURL_ERROR_SIZE]; int i; if (argc == 1) { printf("Usage: [-r url] [-s storage] [-u user] [-p password] [-e encoding] files\n"); return 0; } // read options for (i = 1; i < argc; i++) { if (argv[i][0] == '-') { if (i + 1 >= argc) break; // no option value switch(argv[i][1]) { case 'r': url = argv[i+1]; break; case 's': storage = argv[i+1]; break; case 'u': user = argv[i+1]; break; case 'p': passwd = argv[i+1]; break; case 'e': encoding = argv[i+1]; break; default: printf("Unknown option: %s\n", argv[i]); break; } i++; } } // initialization curl_global_init(CURL_GLOBAL_ALL); curl_handle = curl_easy_init(); curl_easy_setopt(curl_handle, CURLOPT_URL, url); curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, read_reply); curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &reply); curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, err_buf); storage_esc = curl_escape(storage, 0); user_esc = curl_escape(user, 0); passwd_esc = curl_escape(passwd, 0); encoding_esc = curl_escape(encoding, 0); // names of files to be imported are passed as arguments // process each of them for (i = 1; i < argc; i++) { FILE *f; struct stat st; int k, nspaces; char *buf, *post_data; // check if argument is option if (argv[i][0] == '-') { if (i + 1 >= argc) break; // no option value i++; continue; } printf("Reading file: '%s'\n", argv[i]); // open file f = fopen(argv[i], "r"); if (f) { // retrieve file information if (fstat(fileno(f), &st) == 0) { if (S_ISREG(st.st_mode)) { printf("\tSize: %d bytes\n", st.st_size); // read all of it into memory // note: this sample program asumes all of file fits into memory // so if you need to work with larger files figure out something else buf = (char *) malloc(st.st_size + 1); k = fread(buf, 1, st.st_size, f); if (k == st.st_size) { buf[k] = '\0'; // see if it is text file // estimate that by counting whitespace in it: // natural language text in contrary to binary data // must contain significant portion of whitespace nspaces = 0; for (k = 0; k < st.st_size; k++) { if (isspace(buf[k])) nspaces++; } if (nspaces >= st.st_size * REQUIRED_WHITESPACE_FRACTION) { // execute SIETS insert command through HTTP POST interface title_esc = curl_escape(argv[i], 0); text_esc = curl_escape(buf, k); post_data = malloc(strlen(storage_esc) + strlen(user_esc) + strlen(passwd_esc) + strlen(post_fmt) + 2 * strlen(title_esc) + 20 + strlen(text_esc) + strlen(encoding_esc)); sprintf(post_data, post_fmt, storage_esc, user_esc, passwd_esc, title_esc, title_esc, 100, text_esc, encoding_esc); curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, post_data); reply.buf = malloc(reply.len = 1); reply.used = 0; if (curl_easy_perform(curl_handle) != CURLE_OK) { fprintf(stderr, "Error connecting to SIETS server: %s\n", err_buf); } else if (strstr(reply.buf, "")) { // simplified error check *((char *) strstr(reply.buf, "")) = '\0'; fprintf(stderr, "Error returned from SIETS server: %s\n", strstr(reply.buf, "") + 6); } else { *((char *) strstr(reply.buf, "")) = '\0'; printf("Document inserted with id %s\n", strstr(reply.buf, "") + 7); } free(reply.buf); free(title_esc); free(text_esc); free(post_data); } else { printf("\tBinary file: ignored\n"); } } else { fprintf(stderr, "Error reading file\n"); } free(buf); } else { fprintf(stderr, "File '%s' is not a regular file\n", argv[i]); } } else { fprintf(stderr, "Filesystem error retrieving info on '%s'\n", argv[i]); } fclose(f); } else { fprintf(stderr, "Couldn't open file '%s'\n", argv[i]); } } // final cleanup free(storage_esc); free(user_esc); free(passwd_esc); free(encoding_esc); curl_easy_cleanup(curl_handle); curl_global_cleanup(); return 0; }