Codebase list dillo / upstream/2149_9d1ff8be799c dpi / datauri.c
upstream/2149_9d1ff8be799c

Tree @upstream/2149_9d1ff8be799c (Download .tar.gz)

datauri.c @upstream/2149_9d1ff8be799craw · history · blame

/*
 * File: datauri.c
 *
 * Copyright (C) 2006-2007 Jorge Arellano Cid <jcid@dillo.org>
 *
 * Filter dpi for the "data:" URI scheme (RFC 2397).
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>

#include "../dpip/dpip.h"
#include "dpiutil.h"

/*
 * Debugging macros
 */
#define SILENT 1
#define _MSG(...)
#if SILENT
 #define MSG(...)
#else
 #define MSG(...)  fprintf(stderr, "[datauri dpi]: " __VA_ARGS__)
#endif

/*
 * Global variables
 */
static Dsh *sh = NULL;

static void b64strip_illegal_chars(unsigned char* str)
{
   unsigned char *p, *s = str;

   MSG("len=%d{%s}\n", strlen((char*)str), str);

   for (p = s; (*p = *s); ++s) {
      if (isalnum(*p) || strchr("+/=", *p))
         ++p;
   }

   MSG("len=%d{%s}\n", strlen((char *)str), str);
}

static int b64decode(unsigned char* str)
{
   unsigned char *cur, *start;
   int d, dlast, phase;
   unsigned char c;
   static int table[256] = {
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
      52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
      -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
      15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
      -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
      41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
   };

   d = dlast = phase = 0;
   start = str;
   for (cur = str; *cur != '\0'; ++cur ) {
      // jer: treat line endings as physical breaks.
      //if (*cur == '\n' || *cur == '\r'){phase = dlast = 0; continue;}
      d = table[(int)*cur];
      if (d != -1) {
         switch(phase) {
         case 0:
            ++phase;
            break;
         case 1:
            c = ((dlast << 2) | ((d & 0x30) >> 4));
            *str++ = c;
            ++phase;
            break;
         case 2:
            c = (((dlast & 0xf) << 4) | ((d & 0x3c) >> 2));
            *str++ = c;
            ++phase;
            break;
         case 3:
            c = (((dlast & 0x03 ) << 6) | d);
            *str++ = c;
            phase = 0;
            break;
         }
         dlast = d;
      }
   }
   *str = '\0';
   return str - start;
}

/* Modified from src/url.c --------------------------------------------------*/

/*
 * Given an hex octet (e.g., e3, 2F, 20), return the corresponding
 * character if the octet is valid, and -1 otherwise
 */
static int Url_decode_hex_octet(const char *s)
{
   int hex_value;
   char *tail, hex[3];

   if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
      hex[2] = 0;
      hex_value = strtol(hex, &tail, 16);
      if (tail - hex == 2)
         return hex_value;
   }
   return -1;
}

/*
 * Parse possible hexadecimal octets in the URI path.
 * Returns a new allocated string.
 */
char *a_Url_decode_hex_str(const char *str, size_t *p_sz)
{
   char *new_str, *dest;
   int i, val;

   if (!str) {
      *p_sz = 0;
      return NULL;
   }

   dest = new_str = dNew(char, strlen(str) + 1);
   for (i = 0; str[i]; i++) {
      *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
                i+=2, val : str[i];
   }
   *dest = 0;

   new_str = dRealloc(new_str, sizeof(char) * (dest - new_str + 1));
   *p_sz = (size_t)(dest - new_str);
   return new_str;
}

/* end ----------------------------------------------------------------------*/

/*
 * Send decoded data to dillo in an HTTP envelope.
 */
static void send_decoded_data(const char *url, const char *mime_type,
                              unsigned char *data, size_t data_sz)
{
   char *d_cmd;

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   a_Dpip_dsh_write_str(sh, 1, d_cmd);
   dFree(d_cmd);

   /* Send HTTP header. */
   a_Dpip_dsh_write_str(sh, 0, "Content-type: ");
   a_Dpip_dsh_write_str(sh, 0, mime_type);
   a_Dpip_dsh_write_str(sh, 1, "\n\n");

   /* Send message */
   a_Dpip_dsh_write(sh, 0, (char *)data, data_sz);
}

static void send_failure_message(const char *url, const char *mime_type,
                                 unsigned char *data, size_t data_sz)
{
   char *d_cmd;
   char buf[1024];

   const char *msg =
"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>\n"
"<html><body>\n"
"<hr><h1>Datauri dpi</h1><hr>\n"
"<p><b>Can't parse datauri:</b><br>\n";
   const char *msg_mime_type="text/html";

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   a_Dpip_dsh_write_str(sh, 1, d_cmd);
   dFree(d_cmd);

   /* Send HTTP header. */
   a_Dpip_dsh_write_str(sh, 0, "Content-type: ");
   a_Dpip_dsh_write_str(sh, 0, msg_mime_type);
   a_Dpip_dsh_write_str(sh, 1, "\n\n");

   /* Send message */
   a_Dpip_dsh_write_str(sh, 0, msg);

   /* send some debug info */
   snprintf(buf, 1024, "mime_type: %s<br>data size: %d<br>data: %s<br>",
            mime_type, (int)data_sz, data);
   a_Dpip_dsh_write_str(sh, 0, buf);

   /* close page */
   a_Dpip_dsh_write_str(sh, 0, "</body></html>");
}

/*
 * Get mime type from the data URI.
 * TODO: there's no point in handling "charset" because current dillo
 * only handles ISO-LATIN-1. The FLTK2 version (utf-8) could use it in the
 * future.
 */
static char *datauri_get_mime(char *url)
{
   char buf[256];
   char *mime_type = NULL, *p;
   size_t len = 0;

   if (dStrncasecmp(url, "data:", 5) == 0) {
      if ((p = strchr(url, ',')) && p - url < 256) {
         url += 5;
         len = p - url;
         strncpy(buf, url, len);
         buf[len] = 0;
         /* strip ";base64" */
         if (len >= 7 && dStrcasecmp(buf + len - 7, ";base64") == 0) {
            len -= 7;
            buf[len] = 0;
         }
      }

      /* that's it, now handle omitted types */
      if (len == 0) {
         mime_type = dStrdup("text/plain;charset=US-ASCII");
      } else if (!dStrncasecmp(buf, "charset", 7)) {
         mime_type = dStrconcat("text/plain", buf, NULL);
      } else {
         mime_type = dStrdup(buf);
      }
   }

   return mime_type;
}

/*
 * Return a decoded data string.
 */
static unsigned char *datauri_get_data(char *url, size_t *p_sz)
{
   char *p;
   int is_base64 = 0;
   unsigned char *data = NULL;

   if ((p = strchr(url, ',')) && p - url >= 12 &&  /* "data:;base64" */
       dStrncasecmp(p - 7, ";base64", 7) == 0) {
      is_base64 = 1;
   }

   if (p) {
      ++p;
      if (is_base64) {
         data = (unsigned char *)Unescape_uri_str(p);
         b64strip_illegal_chars(data);
         *p_sz = (size_t) b64decode(data);
      } else {
         data = (unsigned char *)a_Url_decode_hex_str(p, p_sz);
      }
   } else {
      data = (unsigned char *)dStrdup("");
      *p_sz = 0;
   }

   return data;
}

/*
 *
 */
int main(void)
{
   char *dpip_tag = NULL, *cmd = NULL, *url = NULL, *mime_type;
   unsigned char *data;
   int rc;
   size_t data_size = 0;

   /* Initialize the SockHandler */
   sh = a_Dpip_dsh_new(STDIN_FILENO, STDOUT_FILENO, 8*1024);

   rc = chdir("/tmp");
   if (rc == -1) {
      MSG("paths: error changing directory to /tmp: %s\n",
          dStrerror(errno));
   }

   /* Authenticate our client... */
   if (!(dpip_tag = a_Dpip_dsh_read_token(sh, 1)) ||
       a_Dpip_check_auth(dpip_tag) < 0) {
      MSG("can't authenticate request: %s\n", dStrerror(errno));
      a_Dpip_dsh_close(sh);
      return 1;
   }
   dFree(dpip_tag);

   /* Read the dpi command from STDIN */
   dpip_tag = a_Dpip_dsh_read_token(sh, 1);
   MSG("[%s]\n", dpip_tag);

   cmd = a_Dpip_get_attr(dpip_tag, "cmd");
   url = a_Dpip_get_attr(dpip_tag, "url");
   if (!cmd || !url) {
      MSG("Error, cmd=%s, url=%s\n", cmd, url);
      exit (EXIT_FAILURE);
   }

   /* Parse the data URI */
   mime_type = datauri_get_mime(url);
   data = datauri_get_data(url, &data_size);

   MSG("mime_type: %s\n", mime_type);
   MSG("data_size: %d\n", (int)data_size);
   MSG("data: {%s}\n", data);

   if (mime_type && data) {
      /* good URI */
      send_decoded_data(url, mime_type, data, data_size);
   } else {
      /* malformed URI */
      send_failure_message(url, mime_type, data, data_size);
   }

   dFree(data);
   dFree(mime_type);
   dFree(url);
   dFree(cmd);
   dFree(dpip_tag);

   /* Finish the SockHandler */
   a_Dpip_dsh_close(sh);
   a_Dpip_dsh_free(sh);

   return 0;
}