#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <ctype.h>
#include <math.h>
#include "paf.h"
#include "sdict.h"
#include "kvec.h"
#include "eps.h"
typedef struct {
uint32_t qn, qs, qe;
uint32_t tn, ts, te;
uint32_t ml;
} dt_hit_t;
typedef struct {
const char *name;
double tot;
uint64_t w;
uint32_t i;
} srtaux_t;
static inline int mixed_numcompare(const char *_a, const char *_b)
{
const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b;
const unsigned char *pa = a, *pb = b;
while (*pa && *pb) {
if (isdigit(*pa) && isdigit(*pb)) {
while (*pa == '0') ++pa;
while (*pb == '0') ++pb;
while (isdigit(*pa) && isdigit(*pb) && *pa == *pb) ++pa, ++pb;
if (isdigit(*pa) && isdigit(*pb)) {
int i = 0;
while (isdigit(pa[i]) && isdigit(pb[i])) ++i;
return isdigit(pa[i])? 1 : isdigit(pb[i])? -1 : (int)*pa - (int)*pb;
} else if (isdigit(*pa)) return 1;
else if (isdigit(*pb)) return -1;
else if (pa - a != pb - b) return pa - a < pb - b? 1 : -1;
} else {
if (*pa != *pb) return (int)*pa - (int)*pb;
++pa; ++pb;
}
}
return *pa? 1 : *pb? -1 : 0;
}
#include "ksort.h"
#define srtx_lt(a, b) (mixed_numcompare((a).name, (b).name) < 0)
KSORT_INIT(dtx, srtaux_t, srtx_lt)
#define srty_lt(a, b) ((a).tot < (b).tot)
KSORT_INIT(dty, srtaux_t, srty_lt)
int main(int argc, char *argv[])
{
int min_span = 1000, min_match = 100, width = 600, height, diagonal = 1;
int color[2] = { 0xFF0000, 0x0080FF }, font_size = 11, no_label = 0;
float min_iden = .1;
paf_file_t *f;
sdict_t *d[2];
paf_rec_t r;
int32_t c, i, j;
uint64_t *acclen[2], totlen[2];
srtaux_t *a[2];
kvec_t(dt_hit_t) h = {0,0,0};
double sx, sy;
while ((c = getopt(argc, argv, "m:i:s:w:f:Ld")) >= 0) {
if (c == 'm') min_match = atoi(optarg);
else if (c == 'i') min_iden = atof(optarg);
else if (c == 's') min_span = atoi(optarg);
else if (c == 'w') width = atoi(optarg);
else if (c == 'f') font_size = atoi(optarg);
else if (c == 'L') no_label = 1;
else if (c == 'd') diagonal = 0;
}
if (argc == optind) {
fprintf(stderr, "Usage: minidot [options] <in.paf>\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -m INT min match length [%d]\n", min_match);
fprintf(stderr, " -i FLOAT min identity [%.2f]\n", min_iden);
fprintf(stderr, " -s INT min span [%d]\n", min_span);
fprintf(stderr, " -w INT image width [%d]\n", width);
fprintf(stderr, " -f INT font size [%d]\n", font_size);
fprintf(stderr, " -L don't print labels\n");
fprintf(stderr, " -D don't try to put hits onto the diagonal\n");
return 1;
}
d[0] = sd_init();
d[1] = sd_init();
f = paf_open(argv[optind]);
if (!f) {
fprintf(stderr, "[E::%s] could not open PAF file %s\n", __func__, argv[optind]);
return 1;
}
while (paf_read(f, &r) >= 0) {
dt_hit_t *s;
if (r.qe - r.qs < min_span || r.te - r.ts < min_span || r.ml < min_match) continue;
if (r.ml < r.bl * min_iden) continue;
kv_pushp(dt_hit_t, h, &s);
s->qn = sd_put(d[1], r.qn, r.ql), s->qs = r.qs, s->qe = r.qe;
s->tn = sd_put(d[0], r.tn, r.tl);
s->ts = r.rev? r.te : r.ts, s->te = r.rev? r.ts : r.te;
s->ml = r.ml;
}
paf_close(f);
for (i = 0; i < 2; ++i) { // 0 for target; 1 for query
uint32_t n = d[i]->n_seq;
uint64_t l = 0;
a[i] = (srtaux_t*)calloc(n + 1, sizeof(srtaux_t));
if (i == 0 || !diagonal) {
for (j = 0; j < n; ++j)
a[i][j].name = d[i]->seq[j].name, a[i][j].i = j;
ks_introsort_dtx(n, a[i]);
} else {
srtaux_t *b = a[i];
for (j = 0; j < n; ++j)
b[j].name = d[i]->seq[j].name, b[j].tot = b[j].w = 0, b[j].i = j;
for (j = 0; j < h.n; ++j) {
uint64_t w, coor;
dt_hit_t *p = &h.a[j];
srtaux_t *q = &b[p->qn];
coor = acclen[0][p->tn] + (p->ts + p->te) / 2;
w = (uint64_t)(.01 * p->ml * p->ml + .499);
q->tot += (double)coor * w;
q->w += w;
}
for (j = 0; j < n; ++j) b[j].tot /= b[j].w;
ks_introsort_dty(n, b);
}
acclen[i] = (uint64_t*)calloc(n, 8);
for (j = 0; j < n; ++j)
acclen[i][a[i][j].i] = l, l += d[i]->seq[a[i][j].i].len;
totlen[i] = l;
}
height = (int)((double)width / totlen[0] * totlen[1] + .499);
sx = (double)width / totlen[0];
sy = (double)height / totlen[1];
eps_header(stdout, width, height, .2);
eps_font(stdout, "Helvetica-Narrow", font_size);
eps_gray(stdout, .8);
if (!no_label) {
// write x labels
for (i = 0; i < d[0]->n_seq; ++i)
eps_Mstr(stdout, (acclen[0][a[0][i].i] + .5 * d[0]->seq[a[0][i].i].len) * sx, font_size*.5, a[0][i].name);
eps_stroke(stdout);
fprintf(stdout, "gsave %g 0 translate 90 rotate\n", font_size*1.25);
// write y labels
for (i = 0; i < d[1]->n_seq; ++i)
eps_Mstr(stdout, (acclen[1][a[1][i].i] + .5 * d[1]->seq[a[1][i].i].len) * sx, 0, a[1][i].name);
fprintf(stdout, "grestore\n");
eps_stroke(stdout);
}
// write grid lines
eps_linewidth(stdout, .1);
for (i = 0; i < d[1]->n_seq; ++i)
eps_linex(stdout, 1, width, i == 0? 1 : acclen[1][a[1][i].i] * sy);
eps_linex(stdout, 1, width, totlen[1] * sy);
for (i = 0; i < d[0]->n_seq; ++i)
eps_liney(stdout, 1, height, i == 0? 1 : acclen[0][a[0][i].i] * sx);
eps_liney(stdout, 1, height, totlen[0] * sx);
eps_stroke(stdout);
// write hits
eps_linewidth(stdout, .1);
for (j = 0; j < 2; ++j) {
eps_color(stdout, color[j]);
for (i = 0; i < h.n; ++i) {
dt_hit_t *p = &h.a[i];
double x0, y0, x1, y1;
uint64_t xo = acclen[0][p->tn], yo = acclen[1][p->qn];
if (j == 0 && p->ts > p->te) continue;
if (j == 1 && p->ts < p->te) continue;
x0 = (p->ts + xo) * sx, y0 = (p->qs + yo) * sy;
x1 = (p->te + xo) * sx, y1 = (p->qe + yo) * sy;
eps_line(stdout, x0, y0, x1, y1);
}
eps_stroke(stdout);
}
eps_bottom(stdout);
for (i = 0; i < 2; ++i) {
free(acclen[i]);
free(a[i]);
sd_destroy(d[i]);
}
free(h.a);
return 0;
}