Codebase list dillo / debian/0.8.6-3 dpi / datauri.c
debian/0.8.6-3

Tree @debian/0.8.6-3 (Download .tar.gz)

datauri.c @debian/0.8.6-3raw · history · blame

/*
 * File: datauri.c
 *
 * Copyright (C) 2006 Jorge Arellano Cid <jcid@dillo.org>
 *
 * Filter dpi for the "data:" URI scheme (RFC 2397).
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <glib.h>

#include "../dpip/dpip.h"
#include "dpiutil.h"

/*
 * Debugging macros
 */
#define _MSG(fmt...)
#define MSG(fmt...)  g_printerr("[datauri dpi]: " fmt)

/*
 * Global variables
 */
static SockHandler *sh = NULL;



int b64decode(unsigned char* str)
{
    unsigned char *cur, *start;
    int d, dlast, phase;
    unsigned char c;
    static int table[256] = {
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
        52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
        -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
        15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
        -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
        41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
    };

    d = dlast = phase = 0;
    start = str;
    for (cur = str; *cur != '\0'; ++cur )
    {
        // jer: treat line endings as physical breaks.
        //if (*cur == '\n' || *cur == '\r'){phase = dlast = 0; continue;}
        d = table[(int)*cur];
        if(d != -1)
        {
            switch(phase)
            {
            case 0:
                ++phase;
                break;
            case 1:
                c = ((dlast << 2) | ((d & 0x30) >> 4));
                *str++ = c;
                ++phase;
                break;
            case 2:
                c = (((dlast & 0xf) << 4) | ((d & 0x3c) >> 2));
                *str++ = c;
                ++phase;
                break;
            case 3:
                c = (((dlast & 0x03 ) << 6) | d);
                *str++ = c;
                phase = 0;
                break;
            }
            dlast = d;
        }
    }
    *str = '\0';
    return str - start;
}

/* Modified from src/url.c --------------------------------------------------*/

/*
 * Given an hex octet (e.g., e3, 2F, 20), return the corresponding
 * character if the octet is valid, and -1 otherwise
 */
static int Url_decode_hex_octet(const gchar *s)
{
   gint hex_value;
   gchar *tail, hex[3];

   if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
      hex[2] = 0;
      hex_value = strtol(hex, &tail, 16);
      if (tail - hex == 2)
        return hex_value;
   }
   return -1;
}

/*
 * Parse possible hexadecimal octets in the URI path.
 * Returns a new allocated string.
 */
gchar *a_Url_decode_hex_str(const gchar *str, size_t *p_sz)
{
   gchar *new_str, *dest;
   int i, val;

   if (!str) {
      *p_sz = 0;
      return NULL;
   }

   dest = new_str = g_new(gchar, strlen(str) + 1);
   for (i = 0; str[i]; i++) {
      *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
                i+=2, val : str[i];
   }
   *dest = 0;  

   new_str = g_realloc(new_str, sizeof(gchar) * (dest - new_str + 1));
   *p_sz = (size_t)(dest - new_str);
   return new_str;
}

/* end ----------------------------------------------------------------------*/

/*
 * Send decoded data to dillo in an HTTP envelope.
 */
void send_decoded_data(const char *url, const char *mime_type,
                       unsigned char *data, size_t data_sz)
{
   char *d_cmd;

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   sock_handler_write_str(sh, d_cmd, 1);
   g_free(d_cmd);

   /* Send HTTP header. */
   sock_handler_write_str(sh, "Content-type: ", 0);
   sock_handler_write_str(sh, mime_type, 0);
   sock_handler_write_str(sh, "\n\n", 1);

   /* Send message */
   sock_handler_write(sh, (char *)data, data_sz, 0);
}

void send_failure_message(const char *url, const char *mime_type,
                          unsigned char *data, size_t data_sz)
{
   char *d_cmd;
   char buf[1024];

   const char *msg = 
"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>\n"
"<html><body>\n"
"<hr><h1>Datauri dpi</h1><hr>\n"
"<p><b>Can't parse datauri:</b><br>\n";
   const char *msg_mime_type="text/html";

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   sock_handler_write_str(sh, d_cmd, 1);
   g_free(d_cmd);

   /* Send HTTP header. */
   sock_handler_write_str(sh, "Content-type: ", 0);
   sock_handler_write_str(sh, msg_mime_type, 0);
   sock_handler_write_str(sh, "\n\n", 1);

   /* Send message */
   sock_handler_write_str(sh, msg, 0);

   /* send some debug info */
   g_snprintf(buf, 1024, "mime_type: %s<br>data size: %d<br>data: %s<br>",
              mime_type, (int)data_sz, data);
   sock_handler_write_str(sh, buf, 0);

   /* close page */
   sock_handler_write_str(sh, "</body></html>", 0);
}

/*
 * Get mime type from the data URI.
 * todo: there's no point in handling "charset" because current dillo
 * only handles ISO-LATIN-1. The FLTK2 version (utf-8) could use it in the
 * future.
 */
char *datauri_get_mime(char *url)
{
   char buf[256];
   char *mime_type = NULL, *p;
   size_t len = 0;

   if (g_strncasecmp(url, "data:", 5) == 0) {
      if ((p = strchr(url, ',')) && p - url < 256) {
         url += 5;
         len = p - url;
         strncpy(buf, url, len);
         buf[len] = 0;
         /* strip ";base64" */
         if (len >= 7 && g_strcasecmp(buf + len - 7, ";base64") == 0) {
            len -= 7;
            buf[len] = 0;
         }
      }

      /* that's it, now handle omitted types */
      if (len == 0) {
         mime_type = g_strdup("text/plain;charset=US-ASCII");
      } else if (!g_strncasecmp(buf, "charset", 7)) {
         mime_type = g_strconcat("text/plain", buf, NULL);
      } else {
         mime_type = g_strdup(buf);
      }
   }

   return mime_type;
}

/*
 * Return a decoded data string.
 */
unsigned char *datauri_get_data(char *url, size_t *p_sz)
{
   char *p;
   int is_base64 = 0;
   unsigned char *data = NULL;

   if ((p = strchr(url, ',')) && p - url >= 12 &&  /* "data:;base64" */
       g_strncasecmp(p - 7, ";base64", 7) == 0) {
      is_base64 = 1;
   }

   if (p) {
      ++p;
      if (is_base64) {
         data = (unsigned char *)g_strdup(p);
         *p_sz = (size_t) b64decode(data);
      } else {
         data = (unsigned char *)a_Url_decode_hex_str(p, p_sz);
      }
   } else {
      data = g_strdup("");
      *p_sz = 0;
   }

   return data;
}

/*
 *
 */
int main(void)
{
   gchar *dpip_tag = NULL, *cmd = NULL, *url = NULL, *mime_type;
   unsigned char *data;
   size_t data_size = 0;

   /* Initialize the SockHandler */
   sh = sock_handler_new(STDIN_FILENO, STDOUT_FILENO, 8*1024);

   /* wget may need to write a temporary file... */
   chdir("/tmp");

   /* Read the dpi command from STDIN */
   dpip_tag = sock_handler_read(sh);
   MSG("[%s]\n", dpip_tag);

   cmd = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "cmd");
   url = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "url");
   if (!cmd || !url) {
      MSG("Error, cmd=%s, url=%s\n", cmd, url);
      exit (EXIT_FAILURE);
   }

   /* Parse the data URI */
   mime_type = datauri_get_mime(url);
   data = datauri_get_data(url, &data_size);

   MSG("mime_type: %s\n", mime_type);
   MSG("data_size: %d\n", data_size);
   MSG("data: {%s}\n", data);

   if (mime_type && data) {
      /* good URI */
      send_decoded_data(url, mime_type, data, data_size);
   } else {
      /* malformed URI */
      send_failure_message(url, mime_type, data, data_size);
   }

   g_free(data);
   g_free(mime_type);
   g_free(url);
   g_free(cmd);
   g_free(dpip_tag);

   /* Finish the SockHandler */
   sock_handler_close(sh);
   sock_handler_free(sh);

   return 0;
}