Imported Upstream version 2016-08-16
Alexandre Mestiashvili
7 years ago
0 | 2016-08-16 twu | |
1 | ||
2 | * VERSION: Updated version number | |
3 | ||
4 | * README: Discussing MAX_STACK_READLENGTH | |
5 | ||
6 | * gsnap.c, uniqscan.c: Using MAX_FLOORS_READLENGTH instead of MAX_READLENGTH | |
7 | ||
8 | * configure.ac: Using MAX_STACK_READLENGTH instead of MAX_READLENGTH | |
9 | ||
10 | * Makefile.gsnaptoo.am: Using MAX_STACK_READLENGTH instead of MAX_READLENGTH | |
11 | ||
12 | * stage1hr.h: Adding max_floor_readlength to setup | |
13 | ||
14 | * stage1hr.c: Removed local allocation of arrays of size MAX_READLENGTH. | |
15 | Now checking querylength against MAX_STACK_READLENGTH to determine whether | |
16 | to allocate from stack or heap. Adding max_floor_readlength to setup | |
17 | ||
18 | * indel.c, mapq.c, sarray-read.c, splice.c: Removed local allocation of | |
19 | arrays of size MAX_READLENGTH. Now checking querylength against | |
20 | MAX_STACK_READLENGTH to determine whether to allocate from stack or heap | |
21 | ||
22 | * stage3hr.c: Not allowing any indels to set trims in determining optimal | |
23 | score | |
24 | ||
25 | * stage1hr.c: Using pre-processor macro LONG_READLENGTHS to allocate | |
26 | read-related memory on heap instead of stack. Setting spliceable_high_p | |
27 | to be false for last segment. In computing end indels, ensuring that | |
28 | shifti is not negative when looking up array value. | |
29 | ||
30 | * shortread.c: Using MAX_EXPECTED_READLENGTH instead of MAX_READLENGTH | |
31 | ||
32 | * stage3.c: Handling the case when trimming ends that exon is empty | |
33 | ||
34 | * stage3hr.c: Restored setting of abort_pairing_p when nconcordant exceeds | |
35 | maxpairedpaths | |
36 | ||
37 | * gsnap.c, uniqscan.c: Using new interface to Pair_setup | |
38 | ||
39 | * indel.c, mapq.c, sarray-read.c, splice.c, substring.c: Using pre-processor | |
40 | macro LONG_READLENGTHS to allocate read-related memory on heap instead of | |
41 | stack | |
42 | ||
43 | * gmap.c, pair.c, pair.h: Added option --gff3-swap-phase | |
44 | ||
45 | * bytecoding.c: Added explanation messages to remove shared memory segments | |
46 | ||
47 | 2016-08-12 twu | |
48 | ||
49 | * Makefile.gsnaptoo.am, config.site.rescomp.prd, configure.ac, filestring.c, | |
50 | genome_sites.c, gsnap.c, pair.c, samprint.c, sarray-read.c, sedgesort.c, | |
51 | sedgesort.h, shortread.c, splice.c, src, stage1hr.c, stage3hr.c, | |
52 | stage3hr.h, substring.c, substring.h, trunk, univdiag.c, univdiag.h, util: | |
53 | Merged revisions 195608 to 196272 from | |
54 | branches/2016-08-09-genome-sites-hr, which contains merged revisions from | |
55 | branches/2016-08-02-long-read-fusions and 2016-07-01-better-triage | |
56 | ||
57 | * VERSION, trunk: Updated version number | |
58 | ||
59 | * Makefile.gsnaptoo.am: Removed chrsubset.c and chrsubset.h for | |
60 | splicing-score | |
61 | ||
62 | * pair.c: Added variable to swap phase for gff3 output | |
63 | ||
64 | * configure.ac: Added a line to disable maintainer mode for users | |
65 | ||
66 | * config.site.rescomp.prd, config.site.rescomp.tst: Updated for latest | |
67 | version | |
68 | ||
69 | * MAINTAINER: Added note about PATH | |
70 | ||
71 | * archive.html, index.html: Updated for latest version | |
72 | ||
0 | 73 | 2016-08-08 twu |
1 | 74 | |
2 | * atoi.c, cmet.c: Fixed reduce procedures for 64-bit oligos | |
3 | ||
4 | * stage1hr.c: Fixed values of splice_pos_start and splice_pos_end given to | |
5 | Genome_donor_positions and related functions | |
6 | ||
7 | * filestring.c: Handling the case where stringlen is negative | |
8 | ||
9 | * stage3.c: Merged revision 195962 from trunk to fix an issue where we tried | |
10 | to use pairs_pretrim after path_trim altered the pairs | |
11 | ||
12 | * samprint.c, substring.c, substring.h: Merged revision 195960 from trunk to | |
13 | fix XT field to have correct fusion coordinates | |
14 | ||
15 | 2016-08-04 twu | |
16 | ||
17 | * 2016-08-02-long-read-fusions, comp.h, config.site.rescomp.prd, pair.c, | |
18 | pairpool.c, sarray-read.c, src, stage3.c, util: Merged revisions 195492 | |
19 | through 195762 from branches/2016-07-01-better-triage to get latest fixes | |
20 | ||
21 | * 2016-08-02-long-read-fusions, Makefile.gsnaptoo.am, comp.h, | |
22 | config.site.rescomp.prd, configure.ac, filestring.c, gsnap.c, pair.c, | |
23 | pairpool.c, samprint.c, sarray-read.c, sedgesort.c, sedgesort.h, | |
24 | shortread.c, src, stage1hr.c, stage3.c, stage3hr.c, stage3hr.h, | |
25 | substring.c, substring.h, univdiag.c, univdiag.h: Merged revisions 193240 | |
26 | to 195491 from branches/2016-07-01-better-triage for better performance | |
27 | ||
28 | 2016-08-03 twu | |
29 | ||
30 | * stage1hr.c: Hard-coded some values for plusp | |
31 | ||
32 | * splice.c: Using new interface to Substring_new_donor and | |
33 | Substring_new_acceptor | |
34 | ||
35 | * stage1hr.c: In computing spliceable segments, using a variable for holding | |
36 | previous spliceable information, to resolve writing to an uninitialized | |
37 | ptr at end. Using a streamlined version of splicing for distant RNA. | |
38 | ||
39 | * substring.c, substring.h: Added parameters substring_querystart and | |
40 | substring_queryend to Substring_new_donor and Substring_new_acceptor, so | |
41 | we can handle splicing segments in the middle of the read | |
42 | ||
43 | * genome_sites.c: Added debugging statements | |
44 | ||
45 | * stage1hr.c: Allowing fusions to occur between middle segments that are | |
46 | spliceable on their distal ends | |
75 | * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in: Printing both | |
76 | gene_id and gene_name | |
77 | ||
78 | * atoi.c, cmet.c: Fixed reduce procedures for 64-bit computers | |
79 | ||
80 | * Makefile.gsnaptoo.am: Added semaphore.c and semaphore.h to list of files | |
81 | for splicing-score | |
82 | ||
83 | * stage1hr.c: Fixed debugging statements | |
84 | ||
85 | * stage3.c: Fixed issue where we tried to use pairs_pretrim after path_trim | |
86 | altered the pairs | |
87 | ||
88 | * samprint.c, substring.c, substring.h: Fixed XT field to print correct | |
89 | junction coordinates | |
47 | 90 | |
48 | 91 | 2016-08-02 twu |
49 | ||
50 | * 2016-08-02-long-read-fusions: Created branch to find fusions in long reads | |
51 | 92 | |
52 | 93 | * stage3hr.c: Restoring final procedure based on nmatches in |
53 | 94 | Stage3pair_optimal_score |
183 | 183 | $(top_srcdir)/config/config.sub \ |
184 | 184 | $(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \ |
185 | 185 | $(top_srcdir)/config/missing AUTHORS COPYING ChangeLog INSTALL \ |
186 | NEWS README config/compile config/config.guess \ | |
186 | NEWS README TODO config/compile config/config.guess \ | |
187 | 187 | config/config.sub config/install-sh config/ltmain.sh \ |
188 | 188 | config/missing |
189 | 189 | DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
272 | 272 | LN_S = @LN_S@ |
273 | 273 | LTLIBOBJS = @LTLIBOBJS@ |
274 | 274 | LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ |
275 | MAINT = @MAINT@ | |
275 | 276 | MAKEINFO = @MAKEINFO@ |
276 | 277 | MANIFEST_TOOL = @MANIFEST_TOOL@ |
277 | MAX_READLENGTH = @MAX_READLENGTH@ | |
278 | MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@ | |
278 | 279 | MKDIR_P = @MKDIR_P@ |
279 | 280 | MPICC = @MPICC@ |
280 | 281 | MPILIBS = @MPILIBS@ |
378 | 379 | .SUFFIXES: |
379 | 380 | am--refresh: Makefile |
380 | 381 | @: |
381 | $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) | |
382 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) | |
382 | 383 | @for dep in $?; do \ |
383 | 384 | case '$(am__configure_deps)' in \ |
384 | 385 | *$$dep*) \ |
404 | 405 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
405 | 406 | $(SHELL) ./config.status --recheck |
406 | 407 | |
407 | $(top_srcdir)/configure: $(am__configure_deps) | |
408 | $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
408 | 409 | $(am__cd) $(srcdir) && $(AUTOCONF) |
409 | $(ACLOCAL_M4): $(am__aclocal_m4_deps) | |
410 | $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) | |
410 | 411 | $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) |
411 | 412 | $(am__aclocal_m4_deps): |
412 | 413 |
50 | 50 | ./configure CONFIG_SITE=<config site file> |
51 | 51 | |
52 | 52 | |
53 | Note 3: GSNAP is designed for short reads of a limited length, and | |
54 | uses a configure variable called MAX_READLENGTH (default 300) as a | |
55 | guide to the maximum read length. You may set this variable by | |
56 | providing it to configure like this | |
57 | ||
58 | ./configure MAX_READLENGTH=<length> | |
53 | Note 3: GSNAP previously had a configure variable called | |
54 | MAX_READLENGTH (default 300) as a guide to the maximum read length. | |
55 | That variable is no longer needed, since GSNAP can align reads of | |
56 | arbitrary length. (But, for longer reads, GMAP will probably be much | |
57 | faster.) | |
58 | ||
59 | However, whenever possible, based on the length of the read, GSNAP | |
60 | will use stack memory instead of heap memory for some algorithms. To | |
61 | control this decision, there is a variable called | |
62 | MAX_STACK_READLENGTH, set like this | |
63 | ||
64 | ./configure MAX_STACK_READLENGTH=<length> | |
59 | 65 | |
60 | 66 | or by defining it in your config.site file (or in the file provided to |
61 | 67 | configure as the value of CONFIG_SITE). Or you may set the value of |
62 | MAX_READLENGTH as an environment variable before calling ./configure. | |
63 | If you do not set MAX_READLENGTH, it will have the default value shown | |
64 | when you run "./configure --help". | |
65 | ||
66 | Note that MAX_READLENGTH applies only to GSNAP. GMAP, on the other | |
67 | hand, can process queries up to 1 million bp. | |
68 | ||
69 | Also, starting with version 2014-08-20, if your C compiler can | |
70 | handle stack-based memory allocation using the alloca() function, | |
71 | GSNAP ignores MAX_READLENGTH, and can handle reads longer than that | |
72 | value. | |
68 | MAX_STACK_READLENGTH as an environment variable before calling | |
69 | ./configure. If you set MAX_STACK_READLENGTH too high, you may | |
70 | overflow the amount of stack allocated by your computer. If you do | |
71 | not set MAX_STACK_READLENGTH, it will have a default value of 300. | |
73 | 72 | |
74 | 73 | |
75 | 74 | Note 4: GSNAP can read from gzip-compressed FASTA or FASTQ input |
644 | 644 | rmdir .tst 2>/dev/null |
645 | 645 | AC_SUBST([am__leading_dot])]) |
646 | 646 | |
647 | # Add --enable-maintainer-mode option to configure. -*- Autoconf -*- | |
648 | # From Jim Meyering | |
649 | ||
650 | # Copyright (C) 1996-2014 Free Software Foundation, Inc. | |
651 | # | |
652 | # This file is free software; the Free Software Foundation | |
653 | # gives unlimited permission to copy and/or distribute it, | |
654 | # with or without modifications, as long as this notice is preserved. | |
655 | ||
656 | # AM_MAINTAINER_MODE([DEFAULT-MODE]) | |
657 | # ---------------------------------- | |
658 | # Control maintainer-specific portions of Makefiles. | |
659 | # Default is to disable them, unless 'enable' is passed literally. | |
660 | # For symmetry, 'disable' may be passed as well. Anyway, the user | |
661 | # can override the default with the --enable/--disable switch. | |
662 | AC_DEFUN([AM_MAINTAINER_MODE], | |
663 | [m4_case(m4_default([$1], [disable]), | |
664 | [enable], [m4_define([am_maintainer_other], [disable])], | |
665 | [disable], [m4_define([am_maintainer_other], [enable])], | |
666 | [m4_define([am_maintainer_other], [enable]) | |
667 | m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) | |
668 | AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) | |
669 | dnl maintainer-mode's default is 'disable' unless 'enable' is passed | |
670 | AC_ARG_ENABLE([maintainer-mode], | |
671 | [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], | |
672 | am_maintainer_other[ make rules and dependencies not useful | |
673 | (and sometimes confusing) to the casual installer])], | |
674 | [USE_MAINTAINER_MODE=$enableval], | |
675 | [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) | |
676 | AC_MSG_RESULT([$USE_MAINTAINER_MODE]) | |
677 | AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) | |
678 | MAINT=$MAINTAINER_MODE_TRUE | |
679 | AC_SUBST([MAINT])dnl | |
680 | ] | |
681 | ) | |
682 | ||
647 | 683 | # Check to see how 'make' treats includes. -*- Autoconf -*- |
648 | 684 | |
649 | 685 | # Copyright (C) 2001-2014 Free Software Foundation, Inc. |
0 | 0 | #! /bin/sh |
1 | 1 | # Guess values for system-dependent variables and create Makefiles. |
2 | # Generated by GNU Autoconf 2.69 for gmap 2016-08-08. | |
2 | # Generated by GNU Autoconf 2.69 for gmap 2016-08-16. | |
3 | 3 | # |
4 | 4 | # Report bugs to <Thomas Wu <twu@gene.com>>. |
5 | 5 | # |
589 | 589 | # Identity of this package. |
590 | 590 | PACKAGE_NAME='gmap' |
591 | 591 | PACKAGE_TARNAME='gmap' |
592 | PACKAGE_VERSION='2016-08-08' | |
593 | PACKAGE_STRING='gmap 2016-08-08' | |
592 | PACKAGE_VERSION='2016-08-16' | |
593 | PACKAGE_STRING='gmap 2016-08-16' | |
594 | 594 | PACKAGE_BUGREPORT='Thomas Wu <twu@gene.com>' |
595 | 595 | PACKAGE_URL='' |
596 | 596 | |
637 | 637 | LIBOBJS |
638 | 638 | BZLIB_LIBS |
639 | 639 | ZLIB_LIBS |
640 | MAX_READLENGTH | |
640 | MAX_STACK_READLENGTH | |
641 | 641 | GMAPDB |
642 | 642 | MAKE_SSE2_FALSE |
643 | 643 | MAKE_SSE2_TRUE |
693 | 693 | MAINTAINER_TRUE |
694 | 694 | FULLDIST_FALSE |
695 | 695 | FULLDIST_TRUE |
696 | MAINT | |
697 | MAINTAINER_MODE_FALSE | |
698 | MAINTAINER_MODE_TRUE | |
696 | 699 | AM_BACKSLASH |
697 | 700 | AM_DEFAULT_VERBOSITY |
698 | 701 | AM_DEFAULT_V |
794 | 797 | enable_largefile |
795 | 798 | enable_dependency_tracking |
796 | 799 | enable_silent_rules |
800 | enable_maintainer_mode | |
797 | 801 | enable_fulldist |
798 | 802 | enable_maintainer |
799 | 803 | enable_shared |
826 | 830 | MPICC |
827 | 831 | LT_SYS_LIBRARY_PATH |
828 | 832 | CPP |
829 | MAX_READLENGTH' | |
833 | MAX_STACK_READLENGTH' | |
830 | 834 | |
831 | 835 | |
832 | 836 | # Initialize some variables set by options. |
1367 | 1371 | # Omit some internal or obsolete options to make the list less imposing. |
1368 | 1372 | # This message is too long to be a string in the A/UX 3.1 sh. |
1369 | 1373 | cat <<_ACEOF |
1370 | \`configure' configures gmap 2016-08-08 to adapt to many kinds of systems. | |
1374 | \`configure' configures gmap 2016-08-16 to adapt to many kinds of systems. | |
1371 | 1375 | |
1372 | 1376 | Usage: $0 [OPTION]... [VAR=VALUE]... |
1373 | 1377 | |
1438 | 1442 | |
1439 | 1443 | if test -n "$ac_init_help"; then |
1440 | 1444 | case $ac_init_help in |
1441 | short | recursive ) echo "Configuration of gmap 2016-08-08:";; | |
1445 | short | recursive ) echo "Configuration of gmap 2016-08-16:";; | |
1442 | 1446 | esac |
1443 | 1447 | cat <<\_ACEOF |
1444 | 1448 | |
1453 | 1457 | speeds up one-time build |
1454 | 1458 | --enable-silent-rules less verbose build output (undo: "make V=1") |
1455 | 1459 | --disable-silent-rules verbose build output (undo: "make V=0") |
1460 | --enable-maintainer-mode | |
1461 | enable make rules and dependencies not useful (and | |
1462 | sometimes confusing) to the casual installer | |
1456 | 1463 | --enable-fulldist For use by program maintainer |
1457 | 1464 | --enable-maintainer For use by program maintainer |
1458 | 1465 | --enable-shared[=PKGS] build shared libraries [default=yes] |
1504 | 1511 | LT_SYS_LIBRARY_PATH |
1505 | 1512 | User-defined run-time library search path. |
1506 | 1513 | CPP C preprocessor |
1507 | MAX_READLENGTH | |
1508 | Maximum read length for GSNAP (default 300) | |
1514 | MAX_STACK_READLENGTH | |
1515 | Maximum read length for GSNAP allocating on stack rather than | |
1516 | heap (default 300) | |
1509 | 1517 | |
1510 | 1518 | Use these variables to override the choices made by `configure' or to help |
1511 | 1519 | it to find libraries and programs with nonstandard names/locations. |
1573 | 1581 | test -n "$ac_init_help" && exit $ac_status |
1574 | 1582 | if $ac_init_version; then |
1575 | 1583 | cat <<\_ACEOF |
1576 | gmap configure 2016-08-08 | |
1584 | gmap configure 2016-08-16 | |
1577 | 1585 | generated by GNU Autoconf 2.69 |
1578 | 1586 | |
1579 | 1587 | Copyright (C) 2012 Free Software Foundation, Inc. |
2179 | 2187 | This file contains any messages produced by compilers while |
2180 | 2188 | running configure, to aid debugging if configure makes a mistake. |
2181 | 2189 | |
2182 | It was created by gmap $as_me 2016-08-08, which was | |
2190 | It was created by gmap $as_me 2016-08-16, which was | |
2183 | 2191 | generated by GNU Autoconf 2.69. Invocation command line was |
2184 | 2192 | |
2185 | 2193 | $ $0 $@ |
2529 | 2537 | |
2530 | 2538 | { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5 |
2531 | 2539 | $as_echo_n "checking package version... " >&6; } |
2532 | { $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-08" >&5 | |
2533 | $as_echo "2016-08-08" >&6; } | |
2540 | { $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-08-16" >&5 | |
2541 | $as_echo "2016-08-16" >&6; } | |
2534 | 2542 | |
2535 | 2543 | |
2536 | 2544 | ### Read defaults |
4395 | 4403 | |
4396 | 4404 | # Define the identity of the package. |
4397 | 4405 | PACKAGE='gmap' |
4398 | VERSION='2016-08-08' | |
4406 | VERSION='2016-08-16' | |
4399 | 4407 | |
4400 | 4408 | |
4401 | 4409 | cat >>confdefs.h <<_ACEOF |
4615 | 4623 | as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 |
4616 | 4624 | fi |
4617 | 4625 | fi |
4626 | ||
4627 | ||
4628 | { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 | |
4629 | $as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } | |
4630 | # Check whether --enable-maintainer-mode was given. | |
4631 | if test "${enable_maintainer_mode+set}" = set; then : | |
4632 | enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval | |
4633 | else | |
4634 | USE_MAINTAINER_MODE=no | |
4635 | fi | |
4636 | ||
4637 | { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 | |
4638 | $as_echo "$USE_MAINTAINER_MODE" >&6; } | |
4639 | if test $USE_MAINTAINER_MODE = yes; then | |
4640 | MAINTAINER_MODE_TRUE= | |
4641 | MAINTAINER_MODE_FALSE='#' | |
4642 | else | |
4643 | MAINTAINER_MODE_TRUE='#' | |
4644 | MAINTAINER_MODE_FALSE= | |
4645 | fi | |
4646 | ||
4647 | MAINT=$MAINTAINER_MODE_TRUE | |
4648 | ||
4649 | ||
4618 | 4650 | |
4619 | 4651 | |
4620 | 4652 | if test "x$enable_fulldist" = xyes; then |
15237 | 15269 | |
15238 | 15270 | #AC_FUNC_MMAP # Checks only private fixed mapping of already-mapped memory |
15239 | 15271 | |
15272 | ||
15240 | 15273 | { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloca is enabled" >&5 |
15241 | 15274 | $as_echo_n "checking whether alloca is enabled... " >&6; } |
15242 | 15275 | # Check whether --enable-alloca was given. |
18917 | 18950 | $as_echo "$GMAPDB" >&6; } |
18918 | 18951 | |
18919 | 18952 | |
18920 | # MAX_READLENGTH | |
18921 | { $as_echo "$as_me:${as_lineno-$LINENO}: checking MAX_READLENGTH" >&5 | |
18922 | $as_echo_n "checking MAX_READLENGTH... " >&6; } | |
18923 | ||
18924 | if test x"$MAX_READLENGTH" = x; then | |
18925 | ||
18926 | EXP_VAR=MAX_READLENGTH | |
18953 | # MAX_STACK_READLENGTH | |
18954 | { $as_echo "$as_me:${as_lineno-$LINENO}: checking MAX_STACK_READLENGTH" >&5 | |
18955 | $as_echo_n "checking MAX_STACK_READLENGTH... " >&6; } | |
18956 | ||
18957 | if test x"$MAX_STACK_READLENGTH" = x; then | |
18958 | ||
18959 | EXP_VAR=MAX_STACK_READLENGTH | |
18927 | 18960 | FROM_VAR='300' |
18928 | 18961 | |
18929 | 18962 | prefix_save=$prefix |
18944 | 18977 | done |
18945 | 18978 | |
18946 | 18979 | full_var=$new_full_var |
18947 | MAX_READLENGTH="$full_var" | |
18980 | MAX_STACK_READLENGTH="$full_var" | |
18948 | 18981 | |
18949 | 18982 | |
18950 | 18983 | prefix=$prefix_save |
18951 | 18984 | exec_prefix=$exec_prefix_save |
18952 | 18985 | |
18953 | 18986 | fi |
18954 | { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAX_READLENGTH" >&5 | |
18955 | $as_echo "$MAX_READLENGTH" >&6; } | |
18987 | { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAX_STACK_READLENGTH" >&5 | |
18988 | $as_echo "$MAX_STACK_READLENGTH" >&6; } | |
18956 | 18989 | |
18957 | 18990 | |
18958 | 18991 | # zlib package |
19641 | 19674 | am__EXEEXT_FALSE= |
19642 | 19675 | fi |
19643 | 19676 | |
19677 | if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then | |
19678 | as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. | |
19679 | Usually this means the macro was only invoked conditionally." "$LINENO" 5 | |
19680 | fi | |
19644 | 19681 | if test -z "${FULLDIST_TRUE}" && test -z "${FULLDIST_FALSE}"; then |
19645 | 19682 | as_fn_error $? "conditional \"FULLDIST\" was never defined. |
19646 | 19683 | Usually this means the macro was only invoked conditionally." "$LINENO" 5 |
20071 | 20108 | # report actual input values of CONFIG_FILES etc. instead of their |
20072 | 20109 | # values after options handling. |
20073 | 20110 | ac_log=" |
20074 | This file was extended by gmap $as_me 2016-08-08, which was | |
20111 | This file was extended by gmap $as_me 2016-08-16, which was | |
20075 | 20112 | generated by GNU Autoconf 2.69. Invocation command line was |
20076 | 20113 | |
20077 | 20114 | CONFIG_FILES = $CONFIG_FILES |
20137 | 20174 | cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 |
20138 | 20175 | ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" |
20139 | 20176 | ac_cs_version="\\ |
20140 | gmap config.status 2016-08-08 | |
20177 | gmap config.status 2016-08-16 | |
20141 | 20178 | configured by $0, generated by GNU Autoconf 2.69, |
20142 | 20179 | with options \\"\$ac_cs_config\\" |
20143 | 20180 |
110 | 110 | #AM_INIT_AUTOMAKE([no-dependencies]) |
111 | 111 | #AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION) |
112 | 112 | AM_INIT_AUTOMAKE |
113 | AM_MAINTAINER_MODE([disable]) | |
114 | ||
113 | 115 | |
114 | 116 | AM_CONDITIONAL(FULLDIST,test "x$enable_fulldist" = xyes) |
115 | 117 | AC_ARG_ENABLE([fulldist], |
260 | 262 | |
261 | 263 | #AC_FUNC_MMAP # Checks only private fixed mapping of already-mapped memory |
262 | 264 | |
265 | ||
263 | 266 | AC_MSG_CHECKING(whether alloca is enabled) |
264 | 267 | AC_ARG_ENABLE([alloca], |
265 | 268 | AC_HELP_STRING([--enable-alloca], |
592 | 595 | AC_MSG_RESULT($GMAPDB) |
593 | 596 | |
594 | 597 | |
595 | # MAX_READLENGTH | |
596 | AC_MSG_CHECKING(MAX_READLENGTH) | |
597 | AC_ARG_VAR([MAX_READLENGTH], [Maximum read length for GSNAP (default 300)]) | |
598 | if test x"$MAX_READLENGTH" = x; then | |
599 | ACX_EXPAND(MAX_READLENGTH,'300') | |
600 | fi | |
601 | AC_MSG_RESULT($MAX_READLENGTH) | |
598 | # MAX_STACK_READLENGTH | |
599 | AC_MSG_CHECKING(MAX_STACK_READLENGTH) | |
600 | AC_ARG_VAR([MAX_STACK_READLENGTH], [Maximum read length for GSNAP allocating on stack rather than heap (default 300)]) | |
601 | if test x"$MAX_STACK_READLENGTH" = x; then | |
602 | ACX_EXPAND(MAX_STACK_READLENGTH,'300') | |
603 | fi | |
604 | AC_MSG_RESULT($MAX_STACK_READLENGTH) | |
602 | 605 | |
603 | 606 | |
604 | 607 | # zlib package |
282 | 282 | # Previously included -lrt for shm_open, but we are not calling that |
283 | 283 | |
284 | 284 | gsnap_nosimd_CC = $(PTHREAD_CC) |
285 | gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
285 | gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
286 | 286 | gsnap_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
287 | 287 | gsnap_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
288 | 288 | dist_gsnap_nosimd_SOURCES = $(GSNAP_FILES) |
289 | 289 | |
290 | 290 | gsnap_sse2_CC = $(PTHREAD_CC) |
291 | gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
291 | gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
292 | 292 | gsnap_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
293 | 293 | gsnap_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
294 | 294 | dist_gsnap_sse2_SOURCES = $(GSNAP_FILES) |
295 | 295 | |
296 | 296 | gsnap_ssse3_CC = $(PTHREAD_CC) |
297 | gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
297 | gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
298 | 298 | gsnap_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
299 | 299 | gsnap_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
300 | 300 | dist_gsnap_ssse3_SOURCES = $(GSNAP_FILES) |
301 | 301 | |
302 | 302 | gsnap_sse41_CC = $(PTHREAD_CC) |
303 | gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
303 | gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
304 | 304 | gsnap_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
305 | 305 | gsnap_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
306 | 306 | dist_gsnap_sse41_SOURCES = $(GSNAP_FILES) |
307 | 307 | |
308 | 308 | gsnap_sse42_CC = $(PTHREAD_CC) |
309 | gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
309 | gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
310 | 310 | gsnap_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
311 | 311 | gsnap_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
312 | 312 | dist_gsnap_sse42_SOURCES = $(GSNAP_FILES) |
313 | 313 | |
314 | 314 | gsnap_avx2_CC = $(PTHREAD_CC) |
315 | gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
315 | gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
316 | 316 | gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
317 | 317 | gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
318 | 318 | dist_gsnap_avx2_SOURCES = $(GSNAP_FILES) |
361 | 361 | # Note: dist_ commands get read by bootstrap, and don't follow the flags |
362 | 362 | |
363 | 363 | gsnapl_nosimd_CC = $(PTHREAD_CC) |
364 | gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
364 | gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
365 | 365 | gsnapl_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
366 | 366 | gsnapl_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
367 | 367 | dist_gsnapl_nosimd_SOURCES = $(GSNAPL_FILES) |
368 | 368 | |
369 | 369 | gsnapl_sse2_CC = $(PTHREAD_CC) |
370 | gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
370 | gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
371 | 371 | gsnapl_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
372 | 372 | gsnapl_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
373 | 373 | dist_gsnapl_sse2_SOURCES = $(GSNAPL_FILES) |
374 | 374 | |
375 | 375 | gsnapl_ssse3_CC = $(PTHREAD_CC) |
376 | gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
376 | gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
377 | 377 | gsnapl_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
378 | 378 | gsnapl_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
379 | 379 | dist_gsnapl_ssse3_SOURCES = $(GSNAPL_FILES) |
380 | 380 | |
381 | 381 | gsnapl_sse41_CC = $(PTHREAD_CC) |
382 | gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
382 | gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
383 | 383 | gsnapl_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
384 | 384 | gsnapl_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
385 | 385 | dist_gsnapl_sse41_SOURCES = $(GSNAPL_FILES) |
386 | 386 | |
387 | 387 | gsnapl_sse42_CC = $(PTHREAD_CC) |
388 | gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
388 | gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
389 | 389 | gsnapl_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
390 | 390 | gsnapl_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
391 | 391 | dist_gsnapl_sse42_SOURCES = $(GSNAPL_FILES) |
392 | 392 | |
393 | 393 | gsnapl_avx2_CC = $(PTHREAD_CC) |
394 | gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
394 | gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
395 | 395 | gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
396 | 396 | gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
397 | 397 | dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES) |
435 | 435 | getopt.c getopt1.c getopt.h uniqscan.c |
436 | 436 | |
437 | 437 | uniqscan_CC = $(PTHREAD_CC) |
438 | uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
438 | uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
439 | 439 | uniqscan_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
440 | 440 | uniqscan_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
441 | 441 | |
477 | 477 | getopt.c getopt1.c getopt.h uniqscan.c |
478 | 478 | |
479 | 479 | uniqscanl_CC = $(PTHREAD_CC) |
480 | uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
480 | uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
481 | 481 | uniqscanl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
482 | 482 | uniqscanl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
483 | 483 | |
701 | 701 | # intlistdef.h intlist.c intlist.h listdef.h list.c list.h \ |
702 | 702 | # univinterval.c univinterval.h interval.c interval.h \ |
703 | 703 | # uintlist.c uintlist.h \ |
704 | # chrom.c chrom.h stopwatch.c stopwatch.h access.c access.h \ | |
704 | # chrom.c chrom.h stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \ | |
705 | 705 | # iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \ |
706 | 706 | # filestring.c filestring.h \ |
707 | 707 | # md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \ |
708 | 708 | # genome.c genome.h \ |
709 | 709 | # genomicpos.c genomicpos.h \ |
710 | # chrnum.c chrnum.h chrsubset.c chrsubset.h \ | |
710 | # chrnum.c chrnum.h \ | |
711 | 711 | # maxent.c maxent.h \ |
712 | 712 | # branchpoint.c branchpoint.h \ |
713 | 713 | # parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c |
2189 | 2189 | ETAGS = etags |
2190 | 2190 | CTAGS = ctags |
2191 | 2191 | am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \ |
2192 | $(top_srcdir)/config/depcomp | |
2192 | $(top_srcdir)/config/depcomp ChangeLog compile | |
2193 | 2193 | DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
2194 | 2194 | ACLOCAL = @ACLOCAL@ |
2195 | 2195 | ALLOCA = @ALLOCA@ |
2235 | 2235 | LN_S = @LN_S@ |
2236 | 2236 | LTLIBOBJS = @LTLIBOBJS@ |
2237 | 2237 | LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ |
2238 | MAINT = @MAINT@ | |
2238 | 2239 | MAKEINFO = @MAKEINFO@ |
2239 | 2240 | MANIFEST_TOOL = @MANIFEST_TOOL@ |
2240 | MAX_READLENGTH = @MAX_READLENGTH@ | |
2241 | MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@ | |
2241 | 2242 | MKDIR_P = @MKDIR_P@ |
2242 | 2243 | MPICC = @MPICC@ |
2243 | 2244 | MPILIBS = @MPILIBS@ |
2547 | 2548 | # Note: dist_ commands get read by bootstrap, and don't follow the flags |
2548 | 2549 | # Previously included -lrt for shm_open, but we are not calling that |
2549 | 2550 | gsnap_nosimd_CC = $(PTHREAD_CC) |
2550 | gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
2551 | gsnap_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
2551 | 2552 | gsnap_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2552 | 2553 | gsnap_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2553 | 2554 | dist_gsnap_nosimd_SOURCES = $(GSNAP_FILES) |
2554 | 2555 | gsnap_sse2_CC = $(PTHREAD_CC) |
2555 | gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
2556 | gsnap_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
2556 | 2557 | gsnap_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2557 | 2558 | gsnap_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2558 | 2559 | dist_gsnap_sse2_SOURCES = $(GSNAP_FILES) |
2559 | 2560 | gsnap_ssse3_CC = $(PTHREAD_CC) |
2560 | gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
2561 | gsnap_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
2561 | 2562 | gsnap_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2562 | 2563 | gsnap_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2563 | 2564 | dist_gsnap_ssse3_SOURCES = $(GSNAP_FILES) |
2564 | 2565 | gsnap_sse41_CC = $(PTHREAD_CC) |
2565 | gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
2566 | gsnap_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
2566 | 2567 | gsnap_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2567 | 2568 | gsnap_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2568 | 2569 | dist_gsnap_sse41_SOURCES = $(GSNAP_FILES) |
2569 | 2570 | gsnap_sse42_CC = $(PTHREAD_CC) |
2570 | gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
2571 | gsnap_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
2571 | 2572 | gsnap_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2572 | 2573 | gsnap_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2573 | 2574 | dist_gsnap_sse42_SOURCES = $(GSNAP_FILES) |
2574 | 2575 | gsnap_avx2_CC = $(PTHREAD_CC) |
2575 | gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
2576 | gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
2576 | 2577 | gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2577 | 2578 | gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2578 | 2579 | dist_gsnap_avx2_SOURCES = $(GSNAP_FILES) |
2616 | 2617 | |
2617 | 2618 | # Note: dist_ commands get read by bootstrap, and don't follow the flags |
2618 | 2619 | gsnapl_nosimd_CC = $(PTHREAD_CC) |
2619 | gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
2620 | gsnapl_nosimd_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
2620 | 2621 | gsnapl_nosimd_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2621 | 2622 | gsnapl_nosimd_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2622 | 2623 | dist_gsnapl_nosimd_SOURCES = $(GSNAPL_FILES) |
2623 | 2624 | gsnapl_sse2_CC = $(PTHREAD_CC) |
2624 | gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
2625 | gsnapl_sse2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 $(SIMD_SSE2_CFLAGS) | |
2625 | 2626 | gsnapl_sse2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2626 | 2627 | gsnapl_sse2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2627 | 2628 | dist_gsnapl_sse2_SOURCES = $(GSNAPL_FILES) |
2628 | 2629 | gsnapl_ssse3_CC = $(PTHREAD_CC) |
2629 | gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
2630 | gsnapl_ssse3_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 $(SIMD_SSSE3_CFLAGS) | |
2630 | 2631 | gsnapl_ssse3_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2631 | 2632 | gsnapl_ssse3_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2632 | 2633 | dist_gsnapl_ssse3_SOURCES = $(GSNAPL_FILES) |
2633 | 2634 | gsnapl_sse41_CC = $(PTHREAD_CC) |
2634 | gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
2635 | gsnapl_sse41_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 $(SIMD_SSE4_1_CFLAGS) | |
2635 | 2636 | gsnapl_sse41_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2636 | 2637 | gsnapl_sse41_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2637 | 2638 | dist_gsnapl_sse41_SOURCES = $(GSNAPL_FILES) |
2638 | 2639 | gsnapl_sse42_CC = $(PTHREAD_CC) |
2639 | gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
2640 | gsnapl_sse42_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 $(SIMD_SSE4_2_CFLAGS) | |
2640 | 2641 | gsnapl_sse42_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2641 | 2642 | gsnapl_sse42_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2642 | 2643 | dist_gsnapl_sse42_SOURCES = $(GSNAPL_FILES) |
2643 | 2644 | gsnapl_avx2_CC = $(PTHREAD_CC) |
2644 | gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
2645 | gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 $(SIMD_AVX2_CFLAGS) | |
2645 | 2646 | gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2646 | 2647 | gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2647 | 2648 | dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES) |
2684 | 2685 | getopt.c getopt1.c getopt.h uniqscan.c |
2685 | 2686 | |
2686 | 2687 | uniqscan_CC = $(PTHREAD_CC) |
2687 | uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
2688 | uniqscan_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) | |
2688 | 2689 | uniqscan_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2689 | 2690 | uniqscan_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2690 | 2691 | dist_uniqscan_SOURCES = $(UNIQSCAN_FILES) |
2723 | 2724 | getopt.c getopt1.c getopt.h uniqscan.c |
2724 | 2725 | |
2725 | 2726 | uniqscanl_CC = $(PTHREAD_CC) |
2726 | uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
2727 | uniqscanl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) | |
2727 | 2728 | uniqscanl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) |
2728 | 2729 | uniqscanl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) |
2729 | 2730 | dist_uniqscanl_SOURCES = $(UNIQSCANL_FILES) |
2927 | 2928 | |
2928 | 2929 | .SUFFIXES: |
2929 | 2930 | .SUFFIXES: .c .lo .o .obj |
2930 | $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) | |
2931 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) | |
2931 | 2932 | @for dep in $?; do \ |
2932 | 2933 | case '$(am__configure_deps)' in \ |
2933 | 2934 | *$$dep*) \ |
2951 | 2952 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
2952 | 2953 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
2953 | 2954 | |
2954 | $(top_srcdir)/configure: $(am__configure_deps) | |
2955 | $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
2955 | 2956 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
2956 | $(ACLOCAL_M4): $(am__aclocal_m4_deps) | |
2957 | $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) | |
2957 | 2958 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
2958 | 2959 | $(am__aclocal_m4_deps): |
2959 | 2960 | |
2964 | 2965 | stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status |
2965 | 2966 | @rm -f stamp-h1 |
2966 | 2967 | cd $(top_builddir) && $(SHELL) ./config.status src/config.h |
2967 | $(srcdir)/config.h.in: $(am__configure_deps) | |
2968 | $(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
2968 | 2969 | ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) |
2969 | 2970 | rm -f stamp-h1 |
2970 | 2971 | touch $@ |
41485 | 41486 | # intlistdef.h intlist.c intlist.h listdef.h list.c list.h \ |
41486 | 41487 | # univinterval.c univinterval.h interval.c interval.h \ |
41487 | 41488 | # uintlist.c uintlist.h \ |
41488 | # chrom.c chrom.h stopwatch.c stopwatch.h access.c access.h \ | |
41489 | # chrom.c chrom.h stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \ | |
41489 | 41490 | # iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \ |
41490 | 41491 | # filestring.c filestring.h \ |
41491 | 41492 | # md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \ |
41492 | 41493 | # genome.c genome.h \ |
41493 | 41494 | # genomicpos.c genomicpos.h \ |
41494 | # chrnum.c chrnum.h chrsubset.c chrsubset.h \ | |
41495 | # chrnum.c chrnum.h \ | |
41495 | 41496 | # maxent.c maxent.h \ |
41496 | 41497 | # branchpoint.c branchpoint.h \ |
41497 | 41498 | # parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c |
0 | static char rcsid[] = "$Id: atoi.c 195988 2016-08-08 19:29:00Z twu $"; | |
0 | static char rcsid[] = "$Id: atoi.c 195989 2016-08-08 21:42:24Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | static char rcsid[] = "$Id: bytecoding.c 179281 2015-11-20 00:10:35Z twu $"; | |
0 | static char rcsid[] = "$Id: bytecoding.c 196402 2016-08-16 14:29:06Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
652 | 652 | /* return exceptions[highi + 1]; */ |
653 | 653 | |
654 | 654 | fprintf(stderr,"Bytecoding_read should have found index %u as an exception, but failed\n",key); |
655 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
656 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
655 | 657 | abort(); |
656 | 658 | } |
657 | 659 | } |
711 | 713 | /* return exceptions[highi + 1]; */ |
712 | 714 | |
713 | 715 | fprintf(stderr,"Bytecoding_read_wguide should have found index %u as an exception, but failed\n",key); |
716 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
717 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
714 | 718 | abort(); |
715 | 719 | } |
716 | 720 | } |
765 | 769 | /* return exceptions[highi + 1]; */ |
766 | 770 | |
767 | 771 | fprintf(stderr,"Bytecoding_lcp should have found index %u as an exception, but failed\n",key); |
772 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
773 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
768 | 774 | abort(); |
769 | 775 | } |
770 | 776 | } |
846 | 852 | /* return exceptions[highi + 1]; */ |
847 | 853 | |
848 | 854 | fprintf(stderr,"Bytecoding_lcpchilddc_child_up should have found index %u as an exception, but failed\n",key); |
855 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
856 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
849 | 857 | abort(); |
850 | 858 | } |
851 | 859 | } |
907 | 915 | /* return exceptions[highi + 1]; */ |
908 | 916 | |
909 | 917 | fprintf(stderr,"Bytecoding_lcpchilddc_child_next should have found index %u as an exception, but failed\n",key); |
918 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
919 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
910 | 920 | abort(); |
911 | 921 | } |
912 | 922 | } |
974 | 984 | /* return exceptions[highi + 1]; */ |
975 | 985 | |
976 | 986 | fprintf(stderr,"Bytecoding_lcpchilddc_lcp_next should have found index %u as an exception, but failed\n",key); |
987 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
988 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
977 | 989 | abort(); |
978 | 990 | } |
979 | 991 | } |
1045 | 1057 | /* return exceptions[highi + 1]; */ |
1046 | 1058 | |
1047 | 1059 | fprintf(stderr,"Bytecoding_lcpchilddcn_child_up should have found index %u as an exception, but failed\n",key); |
1060 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
1061 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
1048 | 1062 | abort(); |
1049 | 1063 | } |
1050 | 1064 | } |
1117 | 1131 | /* return exceptions[highi + 1]; */ |
1118 | 1132 | |
1119 | 1133 | fprintf(stderr,"Bytecoding_lcpchilddcn_child_next should have found index %u as an exception, but failed\n",key); |
1134 | fprintf(stderr,"One possible cause is a corrupted shared memory segment, if GSNAP has exited abnormally\n"); | |
1135 | fprintf(stderr,"Please do 'ipcs -m' and then 'ipcrm -m' on each of those segments\n"); | |
1120 | 1136 | abort(); |
1121 | 1137 | } |
1122 | 1138 | } |
0 | static char rcsid[] = "$Id: cmet.c 195988 2016-08-08 19:29:00Z twu $"; | |
0 | static char rcsid[] = "$Id: cmet.c 195989 2016-08-08 21:42:24Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | /* $Id: comp.h 195763 2016-08-04 01:37:20Z twu $ */ | |
0 | /* $Id: comp.h 195548 2016-08-02 17:18:50Z twu $ */ | |
1 | 1 | #ifndef COMP_INCLUDED |
2 | 2 | #define COMP_INCLUDED |
3 | 3 |
0 | -*- mode: compilation; default-directory: "~/bioinfo/gmap/trunk/src/" -*- | |
1 | Compilation started at Mon Dec 14 14:13:20 | |
2 | ||
3 | make -k gsnap.sse42 | |
4 | /gne/home/twu/bin/gcc -DHAVE_CONFIG_H -I. -pthread -DTARGET=\"x86_64-unknown-linux-gnu\" -DGMAPDB=\"/gne/research/data/bioinfo/gmap/data/genomes\" -DMAX_READLENGTH=300 -DGSNAP=1 -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -msse2 -mssse3 -msse4.1 -msse4.2 -mpopcnt -g -Wall -Wextra -DCHECK_ASSERTIONS=1 -MT gsnap_sse42-dynprog_simd.o -MD -MP -MF .deps/gsnap_sse42-dynprog_simd.Tpo -c -o gsnap_sse42-dynprog_simd.o `test -f 'dynprog_simd.c' || echo './'`dynprog_simd.c | |
5 | dynprog_simd.c: In function ‘Dynprog_simd_8’: | |
6 | dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
7 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
8 | ^ | |
9 | dynprog_simd.c:2143:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
10 | dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
11 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
12 | ^ | |
13 | dynprog_simd.c:2144:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
14 | dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
15 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
16 | ^ | |
17 | dynprog_simd.c:2347:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
18 | dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
19 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
20 | ^ | |
21 | dynprog_simd.c:2348:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
22 | dynprog_simd.c:1942:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable] | |
23 | __m128i gap_open, gap_extend, extend_ladder, complement_dummy; | |
24 | ^ | |
25 | dynprog_simd.c: In function ‘Dynprog_simd_8_upper’: | |
26 | dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
27 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
28 | ^ | |
29 | dynprog_simd.c:2770:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
30 | dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
31 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
32 | ^ | |
33 | dynprog_simd.c:2771:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
34 | dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
35 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
36 | ^ | |
37 | dynprog_simd.c:2896:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
38 | dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
39 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
40 | ^ | |
41 | dynprog_simd.c:2897:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
42 | dynprog_simd.c:2632:8: warning: unused variable ‘na2_single’ [-Wunused-variable] | |
43 | char na2_single; | |
44 | ^ | |
45 | dynprog_simd.c:2626:70: warning: unused variable ‘pairscore’ [-Wunused-variable] | |
46 | Score8_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore; | |
47 | ^ | |
48 | dynprog_simd.c: In function ‘Dynprog_simd_8_lower’: | |
49 | dynprog_simd.c:3238:3: error: ‘extend_ladder’ undeclared (first use in this function) | |
50 | extend_ladder = _mm_setr_epi8(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ext | |
51 | ^ | |
52 | dynprog_simd.c:3238:3: note: each undeclared identifier is reported only once for each function it appears in | |
53 | dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
54 | na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]]; | |
55 | ^ | |
56 | dynprog_simd.c:3267:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
57 | dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
58 | na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]]; | |
59 | ^ | |
60 | dynprog_simd.c:3389:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
61 | dynprog_simd.c:3089:8: warning: unused variable ‘na2_single’ [-Wunused-variable] | |
62 | char na2_single; | |
63 | ^ | |
64 | dynprog_simd.c:3083:45: warning: unused variable ‘pairscore’ [-Wunused-variable] | |
65 | Score8_T *pairscores[5], *pairscores_ptr, pairscore; | |
66 | ^ | |
67 | dynprog_simd.c: In function ‘Dynprog_simd_16’: | |
68 | dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
69 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
70 | ^ | |
71 | dynprog_simd.c:3739:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
72 | dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
73 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
74 | ^ | |
75 | dynprog_simd.c:3740:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
76 | dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
77 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
78 | ^ | |
79 | dynprog_simd.c:3923:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
80 | dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
81 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
82 | ^ | |
83 | dynprog_simd.c:3924:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
84 | dynprog_simd.c:3563:33: warning: variable ‘extend_ladder’ set but not used [-Wunused-but-set-variable] | |
85 | __m128i gap_open, gap_extend, extend_ladder, complement_dummy; | |
86 | ^ | |
87 | dynprog_simd.c: In function ‘Dynprog_simd_16_upper’: | |
88 | dynprog_simd.c:4259:3: error: ‘extend_ladder’ undeclared (first use in this function) | |
89 | extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex | |
90 | ^ | |
91 | dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
92 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
93 | ^ | |
94 | dynprog_simd.c:4284:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
95 | dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
96 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
97 | ^ | |
98 | dynprog_simd.c:4285:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
99 | dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
100 | na2 = revp ? nt_to_int_array[gsequence[1-c]] : nt_to_int_array[gsequence[c-1]]; | |
101 | ^ | |
102 | dynprog_simd.c:4381:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
103 | dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
104 | na2_alt = revp ? nt_to_int_array[gsequence_alt[1-c]] : nt_to_int_array[gsequence_alt[c-1 | |
105 | ^ | |
106 | dynprog_simd.c:4382:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
107 | dynprog_simd.c:4158:8: warning: unused variable ‘na2_single’ [-Wunused-variable] | |
108 | char na2_single; | |
109 | ^ | |
110 | dynprog_simd.c:4152:71: warning: unused variable ‘pairscore’ [-Wunused-variable] | |
111 | Score16_T *pairscores[5], *pairscores_std_ptr, *pairscores_alt_ptr, pairscore; | |
112 | ^ | |
113 | dynprog_simd.c: In function ‘Dynprog_simd_16_lower’: | |
114 | dynprog_simd.c:4675:3: error: ‘extend_ladder’ undeclared (first use in this function) | |
115 | extend_ladder = _mm_setr_epi16(0,extend,2*extend,3*extend,4*extend,5*extend,6*extend,7*ex | |
116 | ^ | |
117 | dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
118 | na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]]; | |
119 | ^ | |
120 | dynprog_simd.c:4699:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
121 | dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
122 | na1 = revp ? nt_to_int_array[rsequence[1-r]] : nt_to_int_array[rsequence[r-1]]; | |
123 | ^ | |
124 | dynprog_simd.c:4792:4: warning: array subscript has type ‘char’ [-Wchar-subscripts] | |
125 | dynprog_simd.c:4542:8: warning: unused variable ‘na2_single’ [-Wunused-variable] | |
126 | char na2_single; | |
127 | ^ | |
128 | dynprog_simd.c:4536:46: warning: unused variable ‘pairscore’ [-Wunused-variable] | |
129 | Score16_T *pairscores[5], *pairscores_ptr, pairscore; | |
130 | ^ | |
131 | dynprog_simd.c: In function ‘Dynprog_traceback_8_lower’: | |
132 | dynprog_simd.c:5278:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable] | |
133 | bool add_dashes_p; | |
134 | ^ | |
135 | dynprog_simd.c:5275:11: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter] | |
136 | int cdna_direction, bool watsonp, int dynprogindex) { | |
137 | ^ | |
138 | dynprog_simd.c: In function ‘Dynprog_traceback_16_lower’: | |
139 | dynprog_simd.c:5662:8: warning: unused variable ‘add_dashes_p’ [-Wunused-variable] | |
140 | bool add_dashes_p; | |
141 | ^ | |
142 | dynprog_simd.c:5659:12: warning: unused parameter ‘cdna_direction’ [-Wunused-parameter] | |
143 | int cdna_direction, bool watsonp, int dynprogindex) { | |
144 | ^ | |
145 | dynprog_simd.c: At top level: | |
146 | dynprog_simd.c:1:13: warning: ‘rcsid’ defined but not used [-Wunused-variable] | |
147 | static char rcsid[] = "$Id: dynprog_simd.c 146623 2014-09-02 21:31:32Z twu $"; | |
148 | ^ | |
149 | dynprog_simd.c:510:1: warning: ‘Directions8_print’ defined but not used [-Wunused-function] | |
150 | Directions8_print (Direction8_T **directions_nogap, Direction8_T **directions_Egap, Directi | |
151 | ^ | |
152 | dynprog_simd.c:604:1: warning: ‘Directions8_print_ud’ defined but not used [-Wunused-function] | |
153 | Directions8_print_ud (Direction8_T **directions_nogap, Direction8_T **directions_Egap, | |
154 | ^ | |
155 | dynprog_simd.c:713:1: warning: ‘Directions16_print’ defined but not used [-Wunused-function] | |
156 | Directions16_print (Direction16_T **directions_nogap, Direction16_T **directions_Egap, Dire | |
157 | ^ | |
158 | dynprog_simd.c:807:1: warning: ‘Directions16_print_ud’ defined but not used [-Wunused-function] | |
159 | Directions16_print_ud (Direction16_T **directions_nogap, Direction16_T **directions_Egap, | |
160 | ^ | |
161 | make: *** [gsnap_sse42-dynprog_simd.o] Error 1 | |
162 | make: Target `gsnap.sse42' not remade because of errors. | |
163 | ||
164 | Compilation exited abnormally with code 2 at Mon Dec 14 14:13:23 |
0 | static char rcsid[] = "$Id: filestring.c 195969 2016-08-08 17:01:27Z twu $"; | |
0 | static char rcsid[] = "$Id: filestring.c 196273 2016-08-12 15:15:06Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | static char rcsid[] = "$Id: genome_sites.c 195749 2016-08-03 23:35:09Z twu $"; | |
0 | static char rcsid[] = "$Id: genome_sites.c 196273 2016-08-12 15:15:06Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | static char rcsid[] = "$Id: gmap.c 193877 2016-07-12 02:46:33Z twu $"; | |
0 | static char rcsid[] = "$Id: gmap.c 196403 2016-08-16 14:33:56Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
381 | 381 | |
382 | 382 | /* GFF3 */ |
383 | 383 | static bool gff3_separators_p = true; |
384 | static bool gff3_phase_swap_p = false; | |
384 | 385 | |
385 | 386 | /* SAM */ |
386 | 387 | /* Applicable to PMAP? */ |
556 | 557 | {"require-splicedir", no_argument, 0, 0}, /* require_splicedir_p */ |
557 | 558 | |
558 | 559 | {"gff3-add-separators", required_argument, 0, 0}, /* gff3_separators_p */ |
560 | {"gff3-swap-phase", required_argument, 0, 0}, /* gff3_phase_swap_p */ | |
559 | 561 | |
560 | 562 | #ifndef PMAP |
561 | 563 | {"quality-protocol", required_argument, 0, 0}, /* quality_shift */ |
5297 | 5299 | split_output_root = optarg; |
5298 | 5300 | } else if (!strcmp(long_name,"append-output")) { |
5299 | 5301 | appendp = true; |
5302 | ||
5300 | 5303 | } else if (!strcmp(long_name,"gff3-add-separators")) { |
5301 | 5304 | if (!strcmp(optarg,"1")) { |
5302 | 5305 | gff3_separators_p = true; |
5304 | 5307 | gff3_separators_p = false; |
5305 | 5308 | } else { |
5306 | 5309 | fprintf(stderr,"--gff3-add-separators flag must be 0 or 1\n"); |
5310 | return 9; | |
5311 | } | |
5312 | ||
5313 | } else if (!strcmp(long_name,"gff3-swap-phase")) { | |
5314 | if (!strcmp(optarg,"1")) { | |
5315 | gff3_phase_swap_p = true; | |
5316 | } else if (!strcmp(optarg,"0")) { | |
5317 | gff3_phase_swap_p = false; | |
5318 | } else { | |
5319 | fprintf(stderr,"--gff3-swap-phase flag must be 0 or 1\n"); | |
5307 | 5320 | return 9; |
5308 | 5321 | } |
5309 | 5322 | |
6574 | 6587 | Pair_setup(trim_mismatch_score,trim_indel_score,gff3_separators_p,sam_insert_0M_p, |
6575 | 6588 | force_xs_direction_p,md_lowercase_variant_p, |
6576 | 6589 | /*snps_p*/genomecomp_alt ? true : false, |
6577 | /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength); | |
6590 | /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength, | |
6591 | gff3_phase_swap_p); | |
6578 | 6592 | Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp, |
6579 | 6593 | require_splicedir_p,splicing_iit,splicing_divint_crosstable, |
6580 | 6594 | donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends, |
7183 | 7197 | fprintf(stdout,"\ |
7184 | 7198 | --gff3-add-separators=INT Whether to add a ### separator after each query sequence\n\ |
7185 | 7199 | Values: 0 (no), 1 (yes, default)\n\ |
7200 | --gff3-swap-phase=INT Whether to swap phase (0 => 0, 1 => 2, 2 => 1) in gff3_gene format\n\ | |
7201 | Needed by some analysis programs, but deviates from GFF3 specification\n\ | |
7202 | Values: 0 (no, default), 1 (yes)\n\ | |
7186 | 7203 | "); |
7187 | 7204 | fprintf(stdout,"\n"); |
7188 | 7205 |
0 | static char rcsid[] = "$Id: gsnap.c 195760 2016-08-04 00:12:04Z twu $"; | |
0 | static char rcsid[] = "$Id: gsnap.c 196438 2016-08-16 20:23:27Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
120 | 120 | |
121 | 121 | #define MIN_INDEXDB_SIZE_THRESHOLD 100 |
122 | 122 | |
123 | #define MAX_FLOORS_READLENGTH 300 | |
123 | 124 | #define MAX_QUERYLENGTH_FOR_ALLOC 100000 |
124 | 125 | #define MAX_GENOMICLENGTH_FOR_ALLOC 1000000 |
125 | 126 | |
751 | 752 | genomedir = Datadir_find_genomedir(/*user_genomedir*/NULL); |
752 | 753 | fprintf(stdout,"Default gmap directory (environment): %s\n",genomedir); |
753 | 754 | FREE(genomedir); |
754 | fprintf(stdout,"Maximum read length: %d\n",MAX_READLENGTH); | |
755 | fprintf(stdout,"Maximum stack read length: %d\n",MAX_STACK_READLENGTH); | |
755 | 756 | fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n"); |
756 | 757 | fprintf(stdout,"Contact: twu@gene.com\n"); |
757 | 758 | fprintf(stdout,"\n"); |
1085 | 1086 | cellpool = Cellpool_new(); |
1086 | 1087 | worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL; |
1087 | 1088 | |
1088 | floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T)); | |
1089 | floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T)); | |
1089 | 1090 | |
1090 | 1091 | /* Except_stack_create(); -- requires pthreads */ |
1091 | 1092 | |
1183 | 1184 | |
1184 | 1185 | /* Except_stack_destroy(); -- requires pthreads */ |
1185 | 1186 | |
1186 | for (i = 0; i <= MAX_READLENGTH; i++) { | |
1187 | for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) { | |
1187 | 1188 | if (floors_array[i] != NULL) { |
1188 | 1189 | Floors_free_keep(&(floors_array[i])); |
1189 | 1190 | } |
1259 | 1260 | cellpool = Cellpool_new(); |
1260 | 1261 | worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL; |
1261 | 1262 | |
1262 | floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T)); | |
1263 | floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T)); | |
1263 | 1264 | |
1264 | 1265 | Except_stack_create(); |
1265 | 1266 | |
1359 | 1360 | |
1360 | 1361 | Except_stack_destroy(); |
1361 | 1362 | |
1362 | for (i = 0; i <= MAX_READLENGTH; i++) { | |
1363 | for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) { | |
1363 | 1364 | if (floors_array[i] != NULL) { |
1364 | 1365 | Floors_free_keep(&(floors_array[i])); |
1365 | 1366 | } |
3299 | 3300 | Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,sam_insert_0M_p, |
3300 | 3301 | force_xs_direction_p,md_lowercase_variant_p, |
3301 | 3302 | /*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p, |
3302 | Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false)); | |
3303 | Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false), | |
3304 | /*gff3_phase_swap_p*/false); | |
3303 | 3305 | Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp, |
3304 | 3306 | /*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable, |
3305 | 3307 | donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends, |
3337 | 3339 | nullgap,maxpeelback,maxpeelback_distalmedial, |
3338 | 3340 | extramaterial_end,extramaterial_paired,gmap_mode, |
3339 | 3341 | trigger_score_for_gmap,gmap_allowance,max_gmap_pairsearch, |
3340 | max_gmap_terminal,max_gmap_improvement,antistranded_penalty); | |
3342 | max_gmap_terminal,max_gmap_improvement,antistranded_penalty, | |
3343 | MAX_FLOORS_READLENGTH); | |
3341 | 3344 | Substring_setup(print_nsnpdiffs_p,print_snplabels_p, |
3342 | 3345 | show_refdiff_p,snps_iit,snps_divint_crosstable, |
3343 | 3346 | genes_iit,genes_divint_crosstable, |
0 | static char rcsid[] = "$Id: indel.c 193229 2016-06-30 22:31:10Z twu $"; | |
0 | static char rcsid[] = "$Id: indel.c 196431 2016-08-16 20:19:22Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
44 | 44 | #endif |
45 | 45 | int nmismatches_left, nmismatches_right; |
46 | 46 | int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti; |
47 | ||
47 | int *mismatch_positions_left, *mismatch_positions_right; | |
48 | 48 | |
49 | 49 | #ifdef HAVE_ALLOCA |
50 | int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
51 | int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
52 | #else | |
53 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
54 | ||
55 | if (max_mismatches_allowed > MAX_READLENGTH) { | |
56 | max_mismatches_allowed = MAX_READLENGTH; | |
57 | } | |
58 | #endif | |
59 | ||
50 | if (querylength <= MAX_STACK_READLENGTH) { | |
51 | mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
52 | mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
53 | } else { | |
54 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
55 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
56 | } | |
57 | #else | |
58 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
59 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
60 | #endif | |
61 | ||
62 | if (max_mismatches_allowed > querylength) { | |
63 | max_mismatches_allowed = querylength; | |
64 | } | |
60 | 65 | |
61 | 66 | /* query has insertion. Get |indels| less from genome; trim from left. */ |
62 | 67 | /* left = ptr->diagonal - querylength; */ |
167 | 172 | } |
168 | 173 | debug2(printf("\n")); |
169 | 174 | |
175 | #ifdef HAVE_ALLOCA | |
176 | if (querylength <= MAX_STACK_READLENGTH) { | |
177 | FREEA(mismatch_positions_left); | |
178 | FREEA(mismatch_positions_right); | |
179 | } else { | |
180 | FREE(mismatch_positions_left); | |
181 | FREE(mismatch_positions_right); | |
182 | } | |
183 | #else | |
184 | FREE(mismatch_positions_left); | |
185 | FREE(mismatch_positions_right); | |
186 | #endif | |
187 | ||
170 | 188 | *best_nmismatches_i = nmismatches_lefti; |
171 | 189 | *best_nmismatches_j = nmismatches_righti; |
172 | 190 | |
202 | 220 | #endif |
203 | 221 | int nmismatches_left, nmismatches_right, nmismatches_lefti, nmismatches_righti; |
204 | 222 | int best_sum, sum, lefti, righti; |
223 | int *mismatch_positions_left, *mismatch_positions_right; | |
205 | 224 | |
206 | 225 | #ifdef HAVE_ALLOCA |
207 | int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
208 | int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
209 | #else | |
210 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
211 | ||
212 | if (max_mismatches_allowed > MAX_READLENGTH) { | |
213 | max_mismatches_allowed = MAX_READLENGTH; | |
214 | } | |
215 | #endif | |
216 | ||
226 | if (querylength <= MAX_STACK_READLENGTH) { | |
227 | mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
228 | mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
229 | } else { | |
230 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
231 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
232 | } | |
233 | #else | |
234 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
235 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
236 | #endif | |
237 | ||
238 | if (max_mismatches_allowed > querylength) { | |
239 | max_mismatches_allowed = querylength; | |
240 | } | |
217 | 241 | |
218 | 242 | /* query has deletion. Get |indels| more from genome; add to right. */ |
219 | 243 | /* left = ptr->diagonal - querylength; */ |
318 | 342 | } |
319 | 343 | debug2(printf("\n")); |
320 | 344 | |
345 | #ifdef HAVE_ALLOCA | |
346 | if (querylength <= MAX_STACK_READLENGTH) { | |
347 | FREEA(mismatch_positions_left); | |
348 | FREEA(mismatch_positions_right); | |
349 | } else { | |
350 | FREE(mismatch_positions_left); | |
351 | FREE(mismatch_positions_right); | |
352 | } | |
353 | #else | |
354 | FREE(mismatch_positions_left); | |
355 | FREE(mismatch_positions_right); | |
356 | #endif | |
357 | ||
321 | 358 | *best_nmismatches_i = nmismatches_lefti; |
322 | 359 | *best_nmismatches_j = nmismatches_righti; |
323 | 360 | |
356 | 393 | int nmismatches_left, nmismatches_right; |
357 | 394 | int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti; |
358 | 395 | int nmismatches1, nmismatches2; |
396 | int *mismatch_positions_left, *mismatch_positions_right; | |
359 | 397 | |
360 | 398 | #ifdef HAVE_ALLOCA |
361 | int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
362 | int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
363 | #else | |
364 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
399 | if (querylength <= MAX_STACK_READLENGTH) { | |
400 | mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
401 | mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
402 | } else { | |
403 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
404 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
405 | } | |
406 | #else | |
407 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
408 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
365 | 409 | #endif |
366 | 410 | |
367 | 411 | |
474 | 518 | } |
475 | 519 | debug2(printf("\n")); |
476 | 520 | |
521 | #ifdef HAVE_ALLOCA | |
522 | if (querylength <= MAX_STACK_READLENGTH) { | |
523 | FREEA(mismatch_positions_left); | |
524 | FREEA(mismatch_positions_right); | |
525 | } else { | |
526 | FREE(mismatch_positions_left); | |
527 | FREE(mismatch_positions_right); | |
528 | } | |
529 | #else | |
530 | FREE(mismatch_positions_left); | |
531 | FREE(mismatch_positions_right); | |
532 | #endif | |
533 | ||
477 | 534 | if (best_sum <= max_mismatches_allowed) { |
478 | 535 | if (plusp == true) { |
479 | 536 | query_indel_pos = best_indel_pos; |
522 | 579 | int nmismatches_left, nmismatches_right; |
523 | 580 | int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti; |
524 | 581 | int nmismatches1, nmismatches2; |
582 | int *mismatch_positions_left, *mismatch_positions_right; | |
525 | 583 | |
526 | 584 | #ifdef HAVE_ALLOCA |
527 | int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
528 | int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
529 | #else | |
530 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
585 | if (querylength <= MAX_STACK_READLENGTH) { | |
586 | mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int)); | |
587 | mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int)); | |
588 | } else { | |
589 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
590 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
591 | } | |
592 | #else | |
593 | mismatch_positions_left = (int *) MALLOC(querylength * sizeof(int)); | |
594 | mismatch_positions_right = (int *) MALLOC(querylength * sizeof(int)); | |
531 | 595 | #endif |
532 | 596 | |
533 | 597 | |
636 | 700 | } |
637 | 701 | debug2(printf("\n")); |
638 | 702 | |
703 | #ifdef HAVE_ALLOCA | |
704 | if (querylength <= MAX_STACK_READLENGTH) { | |
705 | FREEA(mismatch_positions_left); | |
706 | FREEA(mismatch_positions_right); | |
707 | } else { | |
708 | FREE(mismatch_positions_left); | |
709 | FREE(mismatch_positions_right); | |
710 | } | |
711 | #else | |
712 | FREE(mismatch_positions_left); | |
713 | FREE(mismatch_positions_right); | |
714 | #endif | |
639 | 715 | |
640 | 716 | if (best_sum <= max_mismatches_allowed) { |
641 | 717 | if (plusp == true) { |
0 | static char rcsid[] = "$Id: mapq.c 184376 2016-02-16 23:39:30Z twu $"; | |
0 | static char rcsid[] = "$Id: mapq.c 196431 2016-08-16 20:19:22Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
159 | 159 | |
160 | 160 | int nmismatches, i; |
161 | 161 | int alignlength; |
162 | int *mismatch_positions; | |
162 | 163 | |
163 | 164 | #ifdef HAVE_ALLOCA |
164 | int *mismatch_positions = (int *) ALLOCA((querylength+1) * sizeof(int)); | |
165 | #else | |
166 | int mismatch_positions[MAX_READLENGTH+1]; | |
165 | if (querylength <= MAX_STACK_READLENGTH) { | |
166 | mismatch_positions = (int *) ALLOCA((querylength+1) * sizeof(int)); | |
167 | } else { | |
168 | mismatch_positions = (int *) MALLOC((querylength+1) * sizeof(int)); | |
169 | } | |
170 | #else | |
171 | mismatch_positions = (int *) MALLOC((querylength+1) * sizeof(int)); | |
167 | 172 | #endif |
168 | 173 | |
169 | 174 | |
253 | 258 | |
254 | 259 | } |
255 | 260 | |
261 | #ifdef HAVE_ALLOCA | |
262 | if (querylength <= MAX_STACK_READLENGTH) { | |
263 | FREEA(mismatch_positions); | |
264 | } else { | |
265 | FREE(mismatch_positions); | |
266 | } | |
267 | #else | |
268 | FREE(mismatch_positions); | |
269 | #endif | |
270 | ||
256 | 271 | debug(printf("returning loglik %f\n",loglik)); |
257 | 272 | return loglik; |
258 | 273 | } |
0 | static char rcsid[] = "$Id: pair.c 195763 2016-08-04 01:37:20Z twu $"; | |
0 | static char rcsid[] = "$Id: pair.c 196403 2016-08-16 14:33:56Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
148 | 148 | static bool print_nsnpdiffs_p; |
149 | 149 | static double genomelength; /* For BLAST E-value */ |
150 | 150 | |
151 | static bool gff3_phase_swap_p = true; | |
152 | ||
151 | 153 | |
152 | 154 | void |
153 | 155 | Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in, |
154 | 156 | bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in, |
155 | 157 | bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in, |
156 | Univcoord_T genomelength_in) { | |
158 | Univcoord_T genomelength_in, bool gff3_phase_swap_p_in) { | |
157 | 159 | trim_mismatch_score = trim_mismatch_score_in; |
158 | 160 | trim_indel_score = trim_indel_score_in; |
159 | 161 | gff3_separators_p = gff3_separators_p_in; |
163 | 165 | snps_p = snps_p_in; |
164 | 166 | print_nsnpdiffs_p = print_nsnpdiffs_p_in; |
165 | 167 | genomelength = (double) genomelength_in; |
168 | gff3_phase_swap_p = gff3_phase_swap_p_in; | |
166 | 169 | |
167 | 170 | return; |
168 | 171 | } |
2448 | 2451 | } |
2449 | 2452 | } |
2450 | 2453 | |
2451 | FPRINTF(fp,"%d\t",cds_phase); /* 8: phase */ | |
2454 | if (gff3_phase_swap_p == true && cds_phase > 0) { | |
2455 | /* Some analysis programs want phase in gff3 to be different */ | |
2456 | FPRINTF(fp,"%d\t",3 - cds_phase); /* 8: phase */ | |
2457 | } else { | |
2458 | /* This appears to be the specification: a phase of 0 indicates | |
2459 | that the next codon begins at the first base of the region | |
2460 | described by the current line, a phase of 1 indicates that the | |
2461 | next codon begins at the second base of this region, and a | |
2462 | phase of 2 indicates that the codon begins at the third base of | |
2463 | this region. */ | |
2464 | FPRINTF(fp,"%d\t",cds_phase); /* 8: phase */ | |
2465 | } | |
2452 | 2466 | |
2453 | 2467 | /* 9: features */ |
2454 | 2468 | FPRINTF(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno); |
0 | /* $Id: pair.h 193230 2016-06-30 22:32:37Z twu $ */ | |
0 | /* $Id: pair.h 196403 2016-08-16 14:33:56Z twu $ */ | |
1 | 1 | #ifndef PAIR_INCLUDED |
2 | 2 | #define PAIR_INCLUDED |
3 | 3 | |
31 | 31 | Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in, |
32 | 32 | bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in, |
33 | 33 | bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in, |
34 | Univcoord_T genomelength_in); | |
34 | Univcoord_T genomelength_in, bool gff3_phase_swap_p_in); | |
35 | 35 | extern int |
36 | 36 | Pair_querypos (T this); |
37 | 37 | extern Chrpos_T |
0 | static char rcsid[] = "$Id: pairpool.c 195763 2016-08-04 01:37:20Z twu $"; | |
0 | static char rcsid[] = "$Id: pairpool.c 195548 2016-08-02 17:18:50Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | static char rcsid[] = "$Id: samprint.c 195961 2016-08-08 16:36:34Z twu $"; | |
0 | static char rcsid[] = "$Id: samprint.c 196273 2016-08-12 15:15:06Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
2261 | 2261 | |
2262 | 2262 | |
2263 | 2263 | if (sensep == true) { |
2264 | assert(Substring_chimera_pos(donor) == Substring_queryend(donor)); | |
2264 | assert(Substring_siteD_pos(donor) == Substring_queryend(donor)); | |
2265 | 2265 | if (plusp == true) { |
2266 | 2266 | /* sensep true, plusp true */ |
2267 | 2267 | /* FPRINTF(fp,"donor sensep true, plusp true\n"); */ |
2320 | 2320 | } |
2321 | 2321 | |
2322 | 2322 | } else { |
2323 | assert(Substring_chimera_pos(donor) == Substring_querystart(donor)); | |
2323 | assert(Substring_siteD_pos(donor) == Substring_querystart(donor)); | |
2324 | 2324 | if (plusp == true) { |
2325 | 2325 | /* sensep false, plusp true */ |
2326 | 2326 | /* FPRINTF(fp,"donor sensep false, plusp true\n"); */ |
2668 | 2668 | /* 12. TAGS: XT */ |
2669 | 2669 | if (print_xt_p == true) { |
2670 | 2670 | FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob); |
2671 | FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor), | |
2672 | acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor)); | |
2671 | FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor), | |
2672 | acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor)); | |
2673 | 2673 | } |
2674 | 2674 | |
2675 | 2675 | /* 12. TAGS: XC */ |
2775 | 2775 | |
2776 | 2776 | |
2777 | 2777 | if (sensep == true) { |
2778 | assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor)); | |
2778 | assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor)); | |
2779 | 2779 | if (plusp == true) { |
2780 | 2780 | /* sensep true, plusp true */ |
2781 | 2781 | /* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */ |
2829 | 2829 | |
2830 | 2830 | } else { |
2831 | 2831 | /* sensep false, plusp true */ |
2832 | assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor)); | |
2832 | assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor)); | |
2833 | 2833 | if (plusp == true) { |
2834 | 2834 | /* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */ |
2835 | 2835 | if (hide_soft_clips_p == true) { |
3173 | 3173 | /* 12. TAGS: XT */ |
3174 | 3174 | if (print_xt_p == true) { |
3175 | 3175 | FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob); |
3176 | FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord(donor), | |
3177 | acceptor_strand,acceptor_chr,Substring_chr_splicecoord(acceptor)); | |
3176 | FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor), | |
3177 | acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor)); | |
3178 | 3178 | } |
3179 | 3179 | |
3180 | 3180 | |
3231 | 3231 | halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor,sensedir); |
3232 | 3232 | donor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(donor),&allocp); |
3233 | 3233 | acceptor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(acceptor),&allocp); |
3234 | donor_prob = Substring_chimera_prob(donor); | |
3235 | acceptor_prob = Substring_chimera_prob(acceptor); | |
3234 | donor_prob = Substring_siteD_prob(donor); | |
3235 | acceptor_prob = Substring_siteA_prob(acceptor); | |
3236 | 3236 | |
3237 | 3237 | /* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */ |
3238 | 3238 | /* For the donor and acceptor strands, use the substring sensedir and not the Stage3end_T sensedir */ |
0 | static char rcsid[] = "$Id: sarray-read.c 195763 2016-08-04 01:37:20Z twu $"; | |
0 | static char rcsid[] = "$Id: sarray-read.c 196431 2016-08-16 20:19:22Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
2348 | 2348 | this->all_positions = (Univcoord_T *) NULL; |
2349 | 2349 | |
2350 | 2350 | } else { |
2351 | /* Function surrounded by HAVE_ALLOCA */ | |
2351 | 2352 | #ifdef USE_QSORT |
2352 | 2353 | positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T)); |
2353 | 2354 | #else |
2502 | 2503 | #endif |
2503 | 2504 | } |
2504 | 2505 | |
2506 | /* Function surrounded by HAVE_ALLOCA */ | |
2505 | 2507 | FREEA(positions_temp); |
2506 | 2508 | } |
2507 | 2509 | |
2598 | 2600 | this->all_positions = (Univcoord_T *) NULL; |
2599 | 2601 | |
2600 | 2602 | } else { |
2603 | /* Function surrounded by HAVE_ALLOCA */ | |
2601 | 2604 | positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T)); |
2602 | 2605 | |
2603 | 2606 | low_adj = low + this->querystart; |
2750 | 2753 | #endif |
2751 | 2754 | } |
2752 | 2755 | |
2756 | /* Function surrounded by HAVE_ALLOCA */ | |
2753 | 2757 | FREEA(positions_temp); |
2754 | 2758 | } |
2755 | 2759 | |
2790 | 2794 | this->all_positions = (Univcoord_T *) NULL; |
2791 | 2795 | |
2792 | 2796 | } else { |
2797 | /* Function surrounded by HAVE_ALLOCA */ | |
2793 | 2798 | #ifdef USE_QSORT |
2794 | 2799 | positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T)); |
2795 | 2800 | #else |
2970 | 2975 | #endif |
2971 | 2976 | } |
2972 | 2977 | |
2978 | /* Function surrounded by HAVE_ALLOCA */ | |
2973 | 2979 | FREEA(positions_temp); |
2974 | 2980 | } |
2975 | 2981 | |
3651 | 3657 | int k, j, i, n; |
3652 | 3658 | bool segmenti_usedp, segmentj_usedp; |
3653 | 3659 | bool foundp; |
3654 | ||
3655 | #ifdef HAVE_ALLOCA | |
3656 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3657 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3658 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3659 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3660 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3661 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3662 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3663 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3664 | #else | |
3665 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
3666 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
3667 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
3668 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1]; | |
3669 | #endif | |
3660 | int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos, | |
3661 | *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni; | |
3670 | 3662 | |
3671 | 3663 | |
3672 | 3664 | /* Potential success */ |
3679 | 3671 | return false; |
3680 | 3672 | } else { |
3681 | 3673 | left = goal /* - querylength */; |
3682 | } | |
3674 | ||
3675 | #ifdef HAVE_ALLOCA | |
3676 | if (querylength <= MAX_STACK_READLENGTH) { | |
3677 | segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3678 | segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3679 | segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3680 | segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3681 | segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3682 | segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3683 | segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3684 | segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
3685 | } else { | |
3686 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3687 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3688 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3689 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3690 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3691 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3692 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3693 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3694 | } | |
3695 | #else | |
3696 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3697 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3698 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3699 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3700 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3701 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3702 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3703 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
3704 | #endif | |
3705 | } | |
3706 | ||
3683 | 3707 | |
3684 | 3708 | nsame = ndiff = 0; |
3685 | 3709 | querystart_diff = querylength; |
3792 | 3816 | debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same)); |
3793 | 3817 | |
3794 | 3818 | n = Uintlist_length(difflist); |
3819 | #ifdef HAVE_ALLOCA | |
3795 | 3820 | #ifdef USE_QSORT |
3796 | 3821 | array = (UINT4 *) MALLOCA(n * sizeof(UINT4)); |
3797 | 3822 | #else |
3798 | 3823 | array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4)); |
3799 | 3824 | #endif |
3825 | #else | |
3826 | #ifdef USE_QSORT | |
3827 | array = (UINT4 *) MALLOC(n * sizeof(UINT4)); | |
3828 | #else | |
3829 | array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4)); | |
3830 | #endif | |
3831 | #endif | |
3832 | ||
3800 | 3833 | Uintlist_fill_array_and_free(array,&difflist); |
3801 | 3834 | #ifdef USE_QSORT |
3802 | 3835 | qsort(array,n,sizeof(Univcoord_T),Univcoord_compare); |
4000 | 4033 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
4001 | 4034 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4002 | 4035 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
4003 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4004 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4036 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4037 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4005 | 4038 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
4006 | 4039 | best_nmismatches = nmismatches; |
4007 | 4040 | } |
4017 | 4050 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
4018 | 4051 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
4019 | 4052 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
4020 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4021 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4053 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4054 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4022 | 4055 | n_good_spliceends += 1; |
4023 | 4056 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4024 | 4057 | } else { |
4034 | 4067 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || |
4035 | 4068 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
4036 | 4069 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
4037 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4038 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4070 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4071 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4039 | 4072 | n_good_spliceends += 1; |
4040 | 4073 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4041 | 4074 | } else { |
4111 | 4144 | for (k = i; k < j; k++) { |
4112 | 4145 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
4113 | 4146 | #ifdef LARGE_GENOMES |
4114 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
4115 | #else | |
4116 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
4147 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4148 | #else | |
4149 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4117 | 4150 | #endif |
4118 | 4151 | amb_knowni = Intlist_push(amb_knowni,-1); |
4119 | 4152 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
4120 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
4153 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
4121 | 4154 | } |
4122 | 4155 | |
4123 | 4156 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
4124 | prob = best_prob - Substring_chimera_prob(donor); | |
4157 | prob = best_prob - Substring_siteD_prob(donor); | |
4125 | 4158 | *ambiguous = List_push(*ambiguous, |
4126 | 4159 | (void *) Stage3end_new_splice(&(*found_score), |
4127 | 4160 | /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor, |
4174 | 4207 | for (k = i; k < j; k++) { |
4175 | 4208 | donor = Stage3end_substring_donor(hitarray[k]); |
4176 | 4209 | #ifdef LARGE_GENOMES |
4177 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
4178 | #else | |
4179 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
4210 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
4211 | #else | |
4212 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
4180 | 4213 | #endif |
4181 | 4214 | amb_knowni = Intlist_push(amb_knowni,-1); |
4182 | 4215 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
4183 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
4216 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
4184 | 4217 | } |
4185 | 4218 | |
4186 | 4219 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
4187 | prob = best_prob - Substring_chimera_prob(acceptor); | |
4220 | prob = best_prob - Substring_siteA_prob(acceptor); | |
4188 | 4221 | *ambiguous = List_push(*ambiguous, |
4189 | 4222 | (void *) Stage3end_new_splice(&(*found_score), |
4190 | 4223 | nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor), |
4229 | 4262 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
4230 | 4263 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4231 | 4264 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
4232 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4233 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4265 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4266 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4234 | 4267 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
4235 | 4268 | best_nmismatches = nmismatches; |
4236 | 4269 | } |
4248 | 4281 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
4249 | 4282 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
4250 | 4283 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4251 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4252 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4284 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4285 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4253 | 4286 | n_good_spliceends += 1; |
4254 | 4287 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4255 | 4288 | } else { |
4267 | 4300 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
4268 | 4301 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
4269 | 4302 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4270 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4271 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4303 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4304 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4272 | 4305 | n_good_spliceends += 1; |
4273 | 4306 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4274 | 4307 | } else { |
4344 | 4377 | for (k = i; k < j; k++) { |
4345 | 4378 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
4346 | 4379 | #ifdef LARGE_GENOMES |
4347 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
4348 | #else | |
4349 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
4380 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4381 | #else | |
4382 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4350 | 4383 | #endif |
4351 | 4384 | amb_knowni = Intlist_push(amb_knowni,-1); |
4352 | 4385 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
4353 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
4386 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
4354 | 4387 | } |
4355 | 4388 | |
4356 | 4389 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
4357 | prob = best_prob - Substring_chimera_prob(donor); | |
4390 | prob = best_prob - Substring_siteD_prob(donor); | |
4358 | 4391 | *ambiguous = List_push(*ambiguous, |
4359 | 4392 | (void *) Stage3end_new_splice(&(*found_score), |
4360 | 4393 | /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor, |
4407 | 4440 | for (k = i; k < j; k++) { |
4408 | 4441 | donor = Stage3end_substring_donor(hitarray[k]); |
4409 | 4442 | #ifdef LARGE_GENOMES |
4410 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
4411 | #else | |
4412 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
4443 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
4444 | #else | |
4445 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
4413 | 4446 | #endif |
4414 | 4447 | amb_knowni = Intlist_push(amb_knowni,-1); |
4415 | 4448 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
4416 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
4449 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
4417 | 4450 | } |
4418 | 4451 | |
4419 | 4452 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
4420 | prob = best_prob - Substring_chimera_prob(acceptor); | |
4453 | prob = best_prob - Substring_siteA_prob(acceptor); | |
4421 | 4454 | *ambiguous = List_push(*ambiguous, |
4422 | 4455 | (void *) Stage3end_new_splice(&(*found_score), |
4423 | 4456 | nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor), |
4459 | 4492 | } |
4460 | 4493 | List_free(&lowprob); |
4461 | 4494 | |
4495 | #ifdef HAVE_ALLOCA | |
4462 | 4496 | FREEA(array); |
4497 | #else | |
4498 | FREE(array); | |
4499 | #endif | |
4463 | 4500 | |
4464 | 4501 | } else if (querystart_diff == 0 && queryend_same == querylength - 1) { |
4465 | 4502 | left2 = left; |
4467 | 4504 | debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same)); |
4468 | 4505 | |
4469 | 4506 | n = Uintlist_length(difflist); |
4507 | #ifdef HAVE_ALLOCA | |
4470 | 4508 | #ifdef USE_QSORT |
4471 | 4509 | array = (UINT4 *) MALLOCA(n * sizeof(UINT4)); |
4472 | 4510 | #else |
4473 | 4511 | array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4)); |
4474 | 4512 | #endif |
4513 | #else | |
4514 | #ifdef USE_QSORT | |
4515 | array = (UINT4 *) MALLOC(n * sizeof(UINT4)); | |
4516 | #else | |
4517 | array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4)); | |
4518 | #endif | |
4519 | #endif | |
4520 | ||
4475 | 4521 | Uintlist_fill_array_and_free(array,&difflist); |
4476 | 4522 | #ifdef USE_QSORT |
4477 | 4523 | qsort(array,n,sizeof(Univcoord_T),Univcoord_compare); |
4664 | 4710 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
4665 | 4711 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4666 | 4712 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
4667 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4668 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4713 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4714 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4669 | 4715 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
4670 | 4716 | best_nmismatches = nmismatches; |
4671 | 4717 | } |
4681 | 4727 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
4682 | 4728 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
4683 | 4729 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
4684 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4685 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4730 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4731 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4686 | 4732 | n_good_spliceends += 1; |
4687 | 4733 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4688 | 4734 | } else { |
4700 | 4746 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
4701 | 4747 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
4702 | 4748 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4703 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4704 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4749 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4750 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4705 | 4751 | n_good_spliceends += 1; |
4706 | 4752 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4707 | 4753 | } else { |
4777 | 4823 | for (k = i; k < j; k++) { |
4778 | 4824 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
4779 | 4825 | #ifdef LARGE_GENOMES |
4780 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
4781 | #else | |
4782 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
4826 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4827 | #else | |
4828 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
4783 | 4829 | #endif |
4784 | 4830 | amb_knowni = Intlist_push(amb_knowni,-1); |
4785 | 4831 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
4786 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
4832 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
4787 | 4833 | } |
4788 | 4834 | |
4789 | 4835 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
4790 | prob = best_prob - Substring_chimera_prob(donor); | |
4836 | prob = best_prob - Substring_siteD_prob(donor); | |
4791 | 4837 | *ambiguous = List_push(*ambiguous, |
4792 | 4838 | (void *) Stage3end_new_splice(&(*found_score), |
4793 | 4839 | /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor, |
4841 | 4887 | for (k = i; k < j; k++) { |
4842 | 4888 | donor = Stage3end_substring_donor(hitarray[k]); |
4843 | 4889 | #ifdef LARGE_GENOMES |
4844 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
4845 | #else | |
4846 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
4890 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
4891 | #else | |
4892 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
4847 | 4893 | #endif |
4848 | 4894 | amb_knowni = Intlist_push(amb_knowni,-1); |
4849 | 4895 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
4850 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
4896 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
4851 | 4897 | } |
4852 | 4898 | |
4853 | 4899 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
4854 | prob = best_prob - Substring_chimera_prob(acceptor); | |
4900 | prob = best_prob - Substring_siteA_prob(acceptor); | |
4855 | 4901 | *ambiguous = List_push(*ambiguous, |
4856 | 4902 | (void *) Stage3end_new_splice(&(*found_score), |
4857 | 4903 | nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor), |
4896 | 4942 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
4897 | 4943 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4898 | 4944 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
4899 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4900 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4945 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4946 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4901 | 4947 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
4902 | 4948 | best_nmismatches = nmismatches; |
4903 | 4949 | } |
4913 | 4959 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
4914 | 4960 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
4915 | 4961 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
4916 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4917 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4962 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4963 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4918 | 4964 | n_good_spliceends += 1; |
4919 | 4965 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4920 | 4966 | } else { |
4932 | 4978 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
4933 | 4979 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
4934 | 4980 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
4935 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
4936 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
4981 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
4982 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
4937 | 4983 | n_good_spliceends += 1; |
4938 | 4984 | accepted_hits = List_push(accepted_hits,(void *) hit); |
4939 | 4985 | } else { |
5009 | 5055 | for (k = i; k < j; k++) { |
5010 | 5056 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
5011 | 5057 | #ifdef LARGE_GENOMES |
5012 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
5013 | #else | |
5014 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
5058 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
5059 | #else | |
5060 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
5015 | 5061 | #endif |
5016 | 5062 | amb_knowni = Intlist_push(amb_knowni,-1); |
5017 | 5063 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
5018 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
5064 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
5019 | 5065 | } |
5020 | 5066 | |
5021 | 5067 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
5022 | prob = best_prob - Substring_chimera_prob(donor); | |
5068 | prob = best_prob - Substring_siteD_prob(donor); | |
5023 | 5069 | *ambiguous = List_push(*ambiguous, |
5024 | 5070 | (void *) Stage3end_new_splice(&(*found_score), |
5025 | 5071 | /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor, |
5072 | 5118 | for (k = i; k < j; k++) { |
5073 | 5119 | donor = Stage3end_substring_donor(hitarray[k]); |
5074 | 5120 | #ifdef LARGE_GENOMES |
5075 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
5076 | #else | |
5077 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
5121 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
5122 | #else | |
5123 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
5078 | 5124 | #endif |
5079 | 5125 | amb_knowni = Intlist_push(amb_knowni,-1); |
5080 | 5126 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
5081 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
5127 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
5082 | 5128 | } |
5083 | 5129 | |
5084 | 5130 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
5085 | prob = best_prob - Substring_chimera_prob(acceptor); | |
5131 | prob = best_prob - Substring_siteA_prob(acceptor); | |
5086 | 5132 | *ambiguous = List_push(*ambiguous, |
5087 | 5133 | (void *) Stage3end_new_splice(&(*found_score), |
5088 | 5134 | nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor), |
5125 | 5171 | } |
5126 | 5172 | List_free(&lowprob); |
5127 | 5173 | |
5174 | #ifdef HAVE_ALLOCA | |
5128 | 5175 | FREEA(array); |
5176 | #else | |
5177 | FREE(array); | |
5178 | #endif | |
5129 | 5179 | |
5130 | 5180 | } else { |
5131 | 5181 | Uintlist_free(&difflist); |
5132 | 5182 | } |
5183 | ||
5184 | ||
5185 | #ifdef HAVE_ALLOCA | |
5186 | if (querylength <= MAX_STACK_READLENGTH) { | |
5187 | FREEA(segmenti_donor_knownpos); | |
5188 | FREEA(segmentj_acceptor_knownpos); | |
5189 | FREEA(segmentj_antidonor_knownpos); | |
5190 | FREEA(segmenti_antiacceptor_knownpos); | |
5191 | FREEA(segmenti_donor_knowni); | |
5192 | FREEA(segmentj_acceptor_knowni); | |
5193 | FREEA(segmentj_antidonor_knowni); | |
5194 | FREEA(segmenti_antiacceptor_knowni); | |
5195 | } else { | |
5196 | FREE(segmenti_donor_knownpos); | |
5197 | FREE(segmentj_acceptor_knownpos); | |
5198 | FREE(segmentj_antidonor_knownpos); | |
5199 | FREE(segmenti_antiacceptor_knownpos); | |
5200 | FREE(segmenti_donor_knowni); | |
5201 | FREE(segmentj_acceptor_knowni); | |
5202 | FREE(segmentj_antidonor_knowni); | |
5203 | FREE(segmenti_antiacceptor_knowni); | |
5204 | } | |
5205 | #else | |
5206 | FREE(segmenti_donor_knownpos); | |
5207 | FREE(segmentj_acceptor_knownpos); | |
5208 | FREE(segmentj_antidonor_knownpos); | |
5209 | FREE(segmenti_antiacceptor_knownpos); | |
5210 | FREE(segmenti_donor_knowni); | |
5211 | FREE(segmentj_acceptor_knowni); | |
5212 | FREE(segmentj_antidonor_knowni); | |
5213 | FREE(segmenti_antiacceptor_knowni); | |
5214 | #endif | |
5133 | 5215 | |
5134 | 5216 | return twopartp; |
5135 | 5217 | } |
5323 | 5405 | |
5324 | 5406 | #ifdef SUBDIVIDE_NOMATCHES |
5325 | 5407 | /* Try to subdivide elts that have no matches */ |
5408 | #ifdef HAVE_ALLOCA | |
5326 | 5409 | coveredp = (bool *) CALLOCA(querylength,sizeof(bool)); |
5327 | mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *)); | |
5410 | mappings = (Chrpos_T **) ALLOCA(querylength * sizeof(Chrpos_T *)); | |
5328 | 5411 | npositions = (int *) CALLOCA(querylength,sizeof(int)); |
5412 | #else | |
5413 | coveredp = (bool *) CALLOC(querylength,sizeof(bool)); | |
5414 | mappings = (Chrpos_T **) MALLOC(querylength * sizeof(Chrpos_T *)); | |
5415 | npositions = (int *) CALLOC(querylength,sizeof(int)); | |
5416 | #endif | |
5329 | 5417 | oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0); |
5330 | 5418 | indexsize = Oligoindex_indexsize(oligoindex); |
5331 | 5419 | |
5935 | 6023 | |
5936 | 6024 | int segmenti_donor_nknown, segmentj_acceptor_nknown, |
5937 | 6025 | segmentj_antidonor_nknown, segmenti_antiacceptor_nknown; |
6026 | int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos, | |
6027 | *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni; | |
6028 | int j; | |
6029 | ||
5938 | 6030 | #ifdef HAVE_ALLOCA |
5939 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5940 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5941 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5942 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5943 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5944 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5945 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5946 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
5947 | #else | |
5948 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
5949 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
5950 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
5951 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1]; | |
5952 | #endif | |
5953 | ||
5954 | int j; | |
6031 | if (querylength <= MAX_STACK_READLENGTH) { | |
6032 | segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6033 | segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6034 | segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6035 | segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6036 | segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6037 | segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6038 | segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6039 | segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6040 | } else { | |
6041 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6042 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6043 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6044 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6045 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6046 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6047 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6048 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6049 | } | |
6050 | #else | |
6051 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6052 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6053 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6054 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6055 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6056 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6057 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6058 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6059 | #endif | |
6060 | ||
5955 | 6061 | |
5956 | 6062 | debug13(printf("***Entered find_best_path\n")); |
5957 | 6063 | |
6671 | 6777 | debug13(printf("***Exiting find_best_path\n")); |
6672 | 6778 | |
6673 | 6779 | #ifdef SUBDIVIDE_ENDS |
6780 | #ifdef HAVE_ALLOCA | |
6674 | 6781 | FREEA(npositions); |
6675 | 6782 | FREEA(coveredp); |
6676 | 6783 | FREEA(mappings); |
6784 | #else | |
6785 | FREE(npositions); | |
6786 | FREE(coveredp); | |
6787 | FREE(mappings); | |
6788 | #endif | |
6789 | #endif | |
6790 | ||
6791 | ||
6792 | #ifdef HAVE_ALLOCA | |
6793 | if (querylength <= MAX_STACK_READLENGTH) { | |
6794 | FREEA(segmenti_donor_knownpos); | |
6795 | FREEA(segmentj_acceptor_knownpos); | |
6796 | FREEA(segmentj_antidonor_knownpos); | |
6797 | FREEA(segmenti_antiacceptor_knownpos); | |
6798 | FREEA(segmenti_donor_knowni); | |
6799 | FREEA(segmentj_acceptor_knowni); | |
6800 | FREEA(segmentj_antidonor_knowni); | |
6801 | FREEA(segmenti_antiacceptor_knowni); | |
6802 | } else { | |
6803 | FREE(segmenti_donor_knownpos); | |
6804 | FREE(segmentj_acceptor_knownpos); | |
6805 | FREE(segmentj_antidonor_knownpos); | |
6806 | FREE(segmenti_antiacceptor_knownpos); | |
6807 | FREE(segmenti_donor_knowni); | |
6808 | FREE(segmentj_acceptor_knowni); | |
6809 | FREE(segmentj_antidonor_knowni); | |
6810 | FREE(segmenti_antiacceptor_knowni); | |
6811 | } | |
6812 | #else | |
6813 | FREE(segmenti_donor_knownpos); | |
6814 | FREE(segmentj_acceptor_knownpos); | |
6815 | FREE(segmentj_antidonor_knownpos); | |
6816 | FREE(segmenti_antiacceptor_knownpos); | |
6817 | FREE(segmenti_donor_knowni); | |
6818 | FREE(segmentj_acceptor_knowni); | |
6819 | FREE(segmentj_antidonor_knowni); | |
6820 | FREE(segmenti_antiacceptor_knowni); | |
6677 | 6821 | #endif |
6678 | 6822 | |
6679 | 6823 | return middle_path; |
7239 | 7383 | left_ambig_sense, left_ambig_antisense; |
7240 | 7384 | int segmenti_donor_nknown, segmentj_acceptor_nknown, |
7241 | 7385 | segmentj_antidonor_nknown, segmenti_antiacceptor_nknown; |
7386 | int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos, | |
7387 | *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni; | |
7242 | 7388 | int j; |
7243 | 7389 | |
7244 | 7390 | #ifdef HAVE_ALLOCA |
7245 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7246 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7247 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7248 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7249 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7250 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7251 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7252 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7253 | #else | |
7254 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
7255 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
7256 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
7257 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1]; | |
7391 | if (querylength <= MAX_STACK_READLENGTH) { | |
7392 | segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7393 | segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7394 | segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7395 | segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7396 | segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7397 | segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7398 | segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7399 | segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7400 | } else { | |
7401 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7402 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7403 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7404 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7405 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7406 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7407 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7408 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7409 | } | |
7410 | #else | |
7411 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7412 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7413 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7414 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7415 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7416 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7417 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7418 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7258 | 7419 | #endif |
7259 | 7420 | |
7260 | 7421 | |
7637 | 7798 | /* sense_endpoints = Intlist_push(sense_endpoints,queryend); */ |
7638 | 7799 | |
7639 | 7800 | if (plusp == true) { |
7640 | right_ambig_sense = Substring_new_ambig(/*querystart*/splice_pos,queryend, | |
7641 | /*splice_pos*/splice_pos,querylength, | |
7642 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7643 | right_ambcoords_sense,right_amb_knowni_sense, | |
7644 | right_amb_nmismatchesj_sense,right_amb_probsj_sense, | |
7645 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense), | |
7646 | /*amb_donor_common_p*/true,/*substring1p*/false); | |
7801 | right_ambig_sense = Substring_new_ambig_A(/*querystart*/splice_pos,queryend, | |
7802 | /*splice_pos*/splice_pos,querylength, | |
7803 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7804 | right_ambcoords_sense,right_amb_knowni_sense, | |
7805 | right_amb_nmismatchesj_sense,right_amb_probsj_sense, | |
7806 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense), | |
7807 | /*substring1p*/false); | |
7647 | 7808 | } else { |
7648 | right_ambig_sense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos, | |
7649 | /*splice_pos*/querylength - splice_pos,querylength, | |
7650 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7651 | right_ambcoords_sense,right_amb_knowni_sense, | |
7652 | right_amb_nmismatchesj_sense,right_amb_probsj_sense, | |
7653 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense), | |
7654 | /*amb_donor_common_p*/false,/*substring1p*/true); | |
7809 | right_ambig_sense = Substring_new_ambig_D(/*querystart*/querylength - queryend,querylength - splice_pos, | |
7810 | /*splice_pos*/querylength - splice_pos,querylength, | |
7811 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7812 | right_ambcoords_sense,right_amb_knowni_sense, | |
7813 | right_amb_nmismatchesj_sense,right_amb_probsj_sense, | |
7814 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense), | |
7815 | /*substring1p*/true); | |
7655 | 7816 | } |
7656 | 7817 | } |
7657 | 7818 | |
7712 | 7873 | /* antisense_endpoints = Intlist_push(antisense_endpoints,queryend); */ |
7713 | 7874 | |
7714 | 7875 | if (plusp == true) { |
7715 | right_ambig_antisense = Substring_new_ambig(/*querystart*/splice_pos,queryend, | |
7716 | /*splice_pos*/splice_pos,querylength, | |
7717 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7718 | right_ambcoords_antisense,right_amb_knowni_antisense, | |
7719 | right_amb_nmismatchesj_antisense,right_amb_probsj_antisense, | |
7720 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense), | |
7721 | /*amb_donor_common_p*/false,/*substring1p*/false); | |
7876 | right_ambig_antisense = Substring_new_ambig_D(/*querystart*/splice_pos,queryend, | |
7877 | /*splice_pos*/splice_pos,querylength, | |
7878 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7879 | right_ambcoords_antisense,right_amb_knowni_antisense, | |
7880 | right_amb_nmismatchesj_antisense,right_amb_probsj_antisense, | |
7881 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense), | |
7882 | /*substring1p*/false); | |
7722 | 7883 | } else { |
7723 | right_ambig_antisense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos, | |
7724 | /*splice_pos*/querylength - splice_pos,querylength, | |
7725 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7726 | right_ambcoords_antisense,right_amb_knowni_antisense, | |
7727 | right_amb_nmismatchesj_antisense,right_amb_probsj_antisense, | |
7728 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense), | |
7729 | /*amb_donor_common_p*/true,/*substring1p*/true); | |
7884 | right_ambig_antisense = Substring_new_ambig_A(/*querystart*/querylength - queryend,querylength - splice_pos, | |
7885 | /*splice_pos*/querylength - splice_pos,querylength, | |
7886 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7887 | right_ambcoords_antisense,right_amb_knowni_antisense, | |
7888 | right_amb_nmismatchesj_antisense,right_amb_probsj_antisense, | |
7889 | /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense), | |
7890 | /*substring1p*/true); | |
7730 | 7891 | } |
7731 | 7892 | } |
7732 | 7893 | |
7835 | 7996 | /* sense_endpoints = Intlist_push(sense_endpoints,querystart); */ |
7836 | 7997 | |
7837 | 7998 | if (plusp == true) { |
7838 | left_ambig_sense = Substring_new_ambig(querystart,/*queryend*/splice_pos, | |
7839 | /*splice_pos*/splice_pos,querylength, | |
7840 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7841 | left_ambcoords_sense,left_amb_knowni_sense, | |
7842 | left_amb_nmismatchesi_sense,left_amb_probsi_sense, | |
7843 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense), | |
7844 | /*amb_donor_common_p*/false,/*substring1p*/true); | |
7999 | left_ambig_sense = Substring_new_ambig_D(querystart,/*queryend*/splice_pos, | |
8000 | /*splice_pos*/splice_pos,querylength, | |
8001 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
8002 | left_ambcoords_sense,left_amb_knowni_sense, | |
8003 | left_amb_nmismatchesi_sense,left_amb_probsi_sense, | |
8004 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense), | |
8005 | /*substring1p*/true); | |
7845 | 8006 | } else { |
7846 | left_ambig_sense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart, | |
7847 | /*splice_pos*/querylength - splice_pos,querylength, | |
7848 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7849 | left_ambcoords_sense,left_amb_knowni_sense, | |
7850 | left_amb_nmismatchesi_sense,left_amb_probsi_sense, | |
7851 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense), | |
7852 | /*amb_donor_common_p*/true,/*substring1p*/false); | |
8007 | left_ambig_sense = Substring_new_ambig_A(querylength - splice_pos,/*queryend*/querylength - querystart, | |
8008 | /*splice_pos*/querylength - splice_pos,querylength, | |
8009 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
8010 | left_ambcoords_sense,left_amb_knowni_sense, | |
8011 | left_amb_nmismatchesi_sense,left_amb_probsi_sense, | |
8012 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense), | |
8013 | /*substring1p*/false); | |
7853 | 8014 | } |
7854 | 8015 | } |
7855 | 8016 | |
7934 | 8095 | /* antisense_endpoints = Intlist_push(antisense_endpoints,querystart); */ |
7935 | 8096 | |
7936 | 8097 | if (plusp == true) { |
7937 | left_ambig_antisense = Substring_new_ambig(querystart,/*queryend*/splice_pos, | |
7938 | /*splice_pos*/splice_pos,querylength, | |
7939 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7940 | left_ambcoords_antisense,left_amb_knowni_antisense, | |
7941 | left_amb_nmismatchesi_antisense,left_amb_probsi_antisense, | |
7942 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense), | |
7943 | /*amb_donor_common_p*/true,/*substring1p*/true); | |
8098 | left_ambig_antisense = Substring_new_ambig_A(querystart,/*queryend*/splice_pos, | |
8099 | /*splice_pos*/splice_pos,querylength, | |
8100 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
8101 | left_ambcoords_antisense,left_amb_knowni_antisense, | |
8102 | left_amb_nmismatchesi_antisense,left_amb_probsi_antisense, | |
8103 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense), | |
8104 | /*substring1p*/true); | |
7944 | 8105 | } else { |
7945 | left_ambig_antisense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart, | |
7946 | /*splice_pos*/querylength - splice_pos,querylength, | |
7947 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
7948 | left_ambcoords_antisense,left_amb_knowni_antisense, | |
7949 | left_amb_nmismatchesi_antisense,left_amb_probsi_antisense, | |
7950 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense), | |
7951 | /*amb_donor_common_p*/false,/*substring1p*/false); | |
8106 | left_ambig_antisense = Substring_new_ambig_D(querylength - splice_pos,/*queryend*/querylength - querystart, | |
8107 | /*splice_pos*/querylength - splice_pos,querylength, | |
8108 | chrnum,chroffset,chrhigh,chrlength,plusp,genestrand, | |
8109 | left_ambcoords_antisense,left_amb_knowni_antisense, | |
8110 | left_amb_nmismatchesi_antisense,left_amb_probsi_antisense, | |
8111 | /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense), | |
8112 | /*substring1p*/false); | |
7952 | 8113 | } |
7953 | 8114 | } |
7954 | 8115 | |
8129 | 8290 | Univdiag_free(&diagonal); |
8130 | 8291 | } |
8131 | 8292 | List_free(&super_path); |
8293 | ||
8294 | #ifdef HAVE_ALLOCA | |
8295 | if (querylength <= MAX_STACK_READLENGTH) { | |
8296 | FREEA(segmenti_donor_knownpos); | |
8297 | FREEA(segmentj_acceptor_knownpos); | |
8298 | FREEA(segmentj_antidonor_knownpos); | |
8299 | FREEA(segmenti_antiacceptor_knownpos); | |
8300 | FREEA(segmenti_donor_knowni); | |
8301 | FREEA(segmentj_acceptor_knowni); | |
8302 | FREEA(segmentj_antidonor_knowni); | |
8303 | FREEA(segmenti_antiacceptor_knowni); | |
8304 | } else { | |
8305 | FREE(segmenti_donor_knownpos); | |
8306 | FREE(segmentj_acceptor_knownpos); | |
8307 | FREE(segmentj_antidonor_knownpos); | |
8308 | FREE(segmenti_antiacceptor_knownpos); | |
8309 | FREE(segmenti_donor_knowni); | |
8310 | FREE(segmentj_acceptor_knowni); | |
8311 | FREE(segmentj_antidonor_knowni); | |
8312 | FREE(segmenti_antiacceptor_knowni); | |
8313 | } | |
8314 | #else | |
8315 | FREE(segmenti_donor_knownpos); | |
8316 | FREE(segmentj_acceptor_knownpos); | |
8317 | FREE(segmentj_antidonor_knownpos); | |
8318 | FREE(segmenti_antiacceptor_knownpos); | |
8319 | FREE(segmenti_donor_knowni); | |
8320 | FREE(segmentj_acceptor_knowni); | |
8321 | FREE(segmentj_antidonor_knowni); | |
8322 | FREE(segmenti_antiacceptor_knowni); | |
8323 | #endif | |
8132 | 8324 | |
8133 | 8325 | return hits; |
8134 | 8326 | } |
0 | static char rcsid[] = "$Id: sedgesort.c 195760 2016-08-04 00:12:04Z twu $"; | |
0 | static char rcsid[] = "$Id: sedgesort.c 196273 2016-08-12 15:15:06Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
0 | /* $Id: sedgesort.h 195760 2016-08-04 00:12:04Z twu $ */ | |
0 | /* $Id: sedgesort.h 196273 2016-08-12 15:15:06Z twu $ */ | |
1 | 1 | #ifndef SEDGESORT_INCLUDED |
2 | 2 | #define SEDGESORT_INCLUDED |
3 | 3 |
0 | static char rcsid[] = "$Id: shortread.c 195760 2016-08-04 00:12:04Z twu $"; | |
0 | static char rcsid[] = "$Id: shortread.c 196410 2016-08-16 15:57:57Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
199 | 199 | static char Header[HEADERLEN]; |
200 | 200 | static char Discard[DISCARDLEN]; |
201 | 201 | |
202 | static char Read1[MAX_READLENGTH+1]; | |
203 | static char Read2[MAX_READLENGTH+1]; | |
204 | static char Quality[MAX_READLENGTH+1]; | |
202 | ||
203 | /* input_oneline() can actually read longer than this */ | |
204 | #define MAX_EXPECTED_READLENGTH 300 | |
205 | ||
206 | static char Read1[MAX_EXPECTED_READLENGTH+1]; | |
207 | static char Read2[MAX_EXPECTED_READLENGTH+1]; | |
208 | static char Quality[MAX_EXPECTED_READLENGTH+1]; | |
205 | 209 | |
206 | 210 | |
207 | 211 | /* The first element of Sequence is always the null character, to mark |
1486 | 1490 | *longstring = (char *) NULL; |
1487 | 1491 | |
1488 | 1492 | ptr = &(Start[0]); |
1489 | remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char); | |
1493 | remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char); | |
1490 | 1494 | if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) { |
1491 | 1495 | debug(printf("nchars %d: EOF or > or +: Returning 0\n",*nchars)); |
1492 | 1496 | return 0; |
1533 | 1537 | debug(printf("No line feed, but not end of file. Using Intlist_T.\n")); |
1534 | 1538 | intlist = (Intlist_T) NULL; |
1535 | 1539 | i = 0; |
1536 | while (i <= MAX_READLENGTH && Start[i] != '\0') { | |
1540 | while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') { | |
1537 | 1541 | debug(printf("Pushing %c\n",Start[i])); |
1538 | 1542 | intlist = Intlist_push_in(intlist,Start[i]); |
1539 | 1543 | i++; |
1584 | 1588 | *longstring = (char *) NULL; |
1585 | 1589 | |
1586 | 1590 | ptr = &(Start[0]); |
1587 | remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char); | |
1591 | remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char); | |
1588 | 1592 | if (*nextchar == EOF || *nextchar == '\0' || |
1589 | 1593 | (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) { |
1590 | 1594 | debug(printf("EOF or > or +: Returning 0\n")); |
1631 | 1635 | debug(printf("No line feed, but not end of file. Using Intlist_T.\n")); |
1632 | 1636 | intlist = (Intlist_T) NULL; |
1633 | 1637 | i = 0; |
1634 | while (i <= MAX_READLENGTH && Start[i] != '\0') { | |
1638 | while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') { | |
1635 | 1639 | debug(printf("Pushing %c\n",Start[i])); |
1636 | 1640 | intlist = Intlist_push_in(intlist,Start[i]); |
1637 | 1641 | i++; |
1681 | 1685 | *longstring = (char *) NULL; |
1682 | 1686 | |
1683 | 1687 | ptr = &(Start[0]); |
1684 | remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char); | |
1688 | remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char); | |
1685 | 1689 | if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) { |
1686 | 1690 | debug(printf("EOF or > or +: Returning 0\n")); |
1687 | 1691 | return 0; |
1730 | 1734 | debug(printf("No line feed, but not end of file. Using Intlist_T.\n")); |
1731 | 1735 | intlist = (Intlist_T) NULL; |
1732 | 1736 | i = 0; |
1733 | while (i <= MAX_READLENGTH && Start[i] != '\0') { | |
1737 | while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') { | |
1734 | 1738 | debug(printf("Pushing %c\n",Start[i])); |
1735 | 1739 | intlist = Intlist_push_in(intlist,Start[i]); |
1736 | 1740 | i++; |
1791 | 1795 | *longstring = (char *) NULL; |
1792 | 1796 | |
1793 | 1797 | ptr = &(Start[0]); |
1794 | remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char); | |
1798 | remainder = (&(Start[MAX_EXPECTED_READLENGTH]) - ptr)/sizeof(char); | |
1795 | 1799 | if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) { |
1796 | 1800 | debug(printf("EOF or > or +: Returning 0\n")); |
1797 | 1801 | return 0; |
1840 | 1844 | debug(printf("No line feed, but not end of file. Using Intlist_T.\n")); |
1841 | 1845 | intlist = (Intlist_T) NULL; |
1842 | 1846 | i = 0; |
1843 | while (i <= MAX_READLENGTH && Start[i] != '\0') { | |
1847 | while (i <= MAX_EXPECTED_READLENGTH && Start[i] != '\0') { | |
1844 | 1848 | debug(printf("Pushing %c\n",Start[i])); |
1845 | 1849 | intlist = Intlist_push_in(intlist,Start[i]); |
1846 | 1850 | i++; |
0 | static char rcsid[] = "$Id: splice.c 195753 2016-08-03 23:44:46Z twu $"; | |
0 | static char rcsid[] = "$Id: splice.c 196431 2016-08-16 20:19:22Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
140 | 140 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
141 | 141 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
142 | 142 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
143 | int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc; | |
144 | ||
143 | 145 | |
144 | 146 | #ifdef HAVE_ALLOCA |
145 | int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
146 | int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
147 | int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
148 | int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
147 | if (querylength <= MAX_STACK_READLENGTH) { | |
148 | donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
149 | acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
150 | donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
151 | acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
152 | } else { | |
153 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
154 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
155 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
156 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
157 | } | |
149 | 158 | #else |
150 | int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1]; | |
151 | int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1]; | |
159 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
160 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
161 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
162 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
152 | 163 | #endif |
153 | 164 | |
154 | 165 | |
404 | 415 | |
405 | 416 | debug1(printf("best_knowni_i is %d and best_knowni_j is %d\n",*best_knowni_i,*best_knowni_j)); |
406 | 417 | |
418 | #ifdef HAVE_ALLOCA | |
419 | if (querylength <= MAX_STACK_READLENGTH) { | |
420 | FREEA(donor_positions_alloc); | |
421 | FREEA(acceptor_positions_alloc); | |
422 | FREEA(donor_knowni_alloc); | |
423 | FREEA(acceptor_knowni_alloc); | |
424 | } else { | |
425 | FREE(donor_positions_alloc); | |
426 | FREE(acceptor_positions_alloc); | |
427 | FREE(donor_knowni_alloc); | |
428 | FREE(acceptor_knowni_alloc); | |
429 | } | |
430 | #else | |
431 | FREE(donor_positions_alloc); | |
432 | FREE(acceptor_positions_alloc); | |
433 | FREE(donor_knowni_alloc); | |
434 | FREE(acceptor_knowni_alloc); | |
435 | #endif | |
436 | ||
437 | ||
407 | 438 | if (*best_prob_i > 0.95 && *best_prob_j > 0.70) { |
408 | 439 | debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j)); |
409 | 440 | debug1(printf("nmismatches %d and %d\n",*best_nmismatches_i,*best_nmismatches_j)); |
450 | 481 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
451 | 482 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
452 | 483 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
484 | int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc; | |
485 | ||
453 | 486 | |
454 | 487 | #ifdef HAVE_ALLOCA |
455 | int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
456 | int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
457 | int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
458 | int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
488 | if (querylength <= MAX_STACK_READLENGTH) { | |
489 | donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
490 | acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
491 | donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
492 | acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
493 | } else { | |
494 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
495 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
496 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
497 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
498 | } | |
459 | 499 | #else |
460 | int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1]; | |
461 | int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1]; | |
500 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
501 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
502 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
503 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
462 | 504 | #endif |
463 | 505 | |
464 | 506 | debug1(printf("Splice_resolve_antisense: Getting genome at lefti %u and leftj %u (diff: %d), range %d..%d\n", |
710 | 752 | } |
711 | 753 | } |
712 | 754 | |
755 | #ifdef HAVE_ALLOCA | |
756 | if (querylength <= MAX_STACK_READLENGTH) { | |
757 | FREEA(donor_positions_alloc); | |
758 | FREEA(acceptor_positions_alloc); | |
759 | FREEA(donor_knowni_alloc); | |
760 | FREEA(acceptor_knowni_alloc); | |
761 | } else { | |
762 | FREE(donor_positions_alloc); | |
763 | FREE(acceptor_positions_alloc); | |
764 | FREE(donor_knowni_alloc); | |
765 | FREE(acceptor_knowni_alloc); | |
766 | } | |
767 | #else | |
768 | FREE(donor_positions_alloc); | |
769 | FREE(acceptor_positions_alloc); | |
770 | FREE(donor_knowni_alloc); | |
771 | FREE(acceptor_knowni_alloc); | |
772 | #endif | |
773 | ||
774 | ||
713 | 775 | debug1(printf("best_knowni_i is %d and best_knowni_j is %d\n",*best_knowni_i,*best_knowni_j)); |
714 | 776 | |
715 | 777 | if (*best_prob_i > 0.95 && *best_prob_j > 0.70) { |
771 | 833 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
772 | 834 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
773 | 835 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
836 | int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc; | |
837 | ||
774 | 838 | |
775 | 839 | #ifdef HAVE_ALLOCA |
776 | int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
777 | int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
778 | int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
779 | int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
840 | if (querylength <= MAX_STACK_READLENGTH) { | |
841 | donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
842 | acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
843 | donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
844 | acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
845 | } else { | |
846 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
847 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
848 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
849 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
850 | } | |
780 | 851 | #else |
781 | int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1]; | |
782 | int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1]; | |
783 | #endif | |
784 | ||
852 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
853 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
854 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
855 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
856 | #endif | |
785 | 857 | |
786 | 858 | debug1(printf("Splice_solve_single: Getting genome at lefti %u and leftj %u (diff: %d)\n", |
787 | 859 | segmenti_left,segmentj_left,segmentj_left-segmenti_left)); |
1083 | 1155 | |
1084 | 1156 | if (sufficient1p && sufficient2p) { |
1085 | 1157 | *nhits += 1; |
1086 | return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, | |
1158 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, | |
1087 | 1159 | donor,acceptor,best_donor_prob,best_acceptor_prob, |
1088 | 1160 | /*distance*/segmentj_left - segmenti_left, |
1089 | 1161 | /*shortdistancep*/true,splicing_penalty,querylength, |
1093 | 1165 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1094 | 1166 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1095 | 1167 | sarrayp)); |
1168 | /* return hits; */ | |
1096 | 1169 | } else if (subs_or_indels_p == true) { |
1097 | 1170 | if (donor != NULL) Substring_free(&donor); |
1098 | 1171 | if (acceptor != NULL) Substring_free(&acceptor); |
1099 | return hits; | |
1172 | /* return hits; */ | |
1100 | 1173 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { |
1101 | 1174 | if (donor != NULL) Substring_free(&donor); |
1102 | 1175 | if (acceptor != NULL) Substring_free(&acceptor); |
1103 | return hits; | |
1176 | /* return hits; */ | |
1104 | 1177 | } else if (sufficient1p || sufficient2p) { |
1105 | 1178 | *lowprob = List_push(*lowprob, |
1106 | 1179 | (void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, |
1113 | 1186 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1114 | 1187 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1115 | 1188 | sarrayp)); |
1116 | return hits; | |
1189 | /* return hits; */ | |
1117 | 1190 | } else { |
1118 | 1191 | if (donor != NULL) Substring_free(&donor); |
1119 | 1192 | if (acceptor != NULL) Substring_free(&acceptor); |
1193 | /* ? return hits; */ | |
1120 | 1194 | } |
1121 | 1195 | } |
1122 | 1196 | |
1153 | 1227 | sufficient2p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor_prob); |
1154 | 1228 | if (sufficient1p && sufficient2p) { |
1155 | 1229 | *nhits += 1; |
1156 | return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, | |
1230 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, | |
1157 | 1231 | donor,acceptor,best_donor_prob,best_acceptor_prob, |
1158 | 1232 | /*distance*/segmentj_left - segmenti_left, |
1159 | 1233 | /*shortdistancep*/true,splicing_penalty,querylength, |
1163 | 1237 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1164 | 1238 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1165 | 1239 | sarrayp)); |
1240 | /* return hits; */ | |
1166 | 1241 | } else if (subs_or_indels_p == true) { |
1167 | 1242 | if (donor != NULL) Substring_free(&donor); |
1168 | 1243 | if (acceptor != NULL) Substring_free(&acceptor); |
1169 | return hits; | |
1244 | /* return hits; */ | |
1170 | 1245 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { |
1171 | 1246 | if (donor != NULL) Substring_free(&donor); |
1172 | 1247 | if (acceptor != NULL) Substring_free(&acceptor); |
1173 | return hits; | |
1248 | /* return hits; */ | |
1174 | 1249 | } else if (sufficient1p || sufficient2p) { |
1175 | 1250 | *lowprob = List_push(*lowprob, |
1176 | 1251 | (void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, |
1183 | 1258 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1184 | 1259 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1185 | 1260 | sarrayp)); |
1186 | return hits; | |
1261 | /* return hits; */ | |
1187 | 1262 | } else { |
1188 | 1263 | if (donor != NULL) Substring_free(&donor); |
1189 | 1264 | if (acceptor != NULL) Substring_free(&acceptor); |
1190 | return hits; | |
1191 | } | |
1192 | } | |
1193 | } | |
1194 | } | |
1195 | } | |
1196 | ||
1197 | debug1(printf("Splice_solve_single_sense fail\n")); | |
1265 | /* ? return hits; */ | |
1266 | } | |
1267 | } | |
1268 | } | |
1269 | } | |
1270 | } | |
1271 | ||
1272 | #ifdef HAVE_ALLOCA | |
1273 | if (querylength <= MAX_STACK_READLENGTH) { | |
1274 | FREEA(donor_positions_alloc); | |
1275 | FREEA(acceptor_positions_alloc); | |
1276 | FREEA(donor_knowni_alloc); | |
1277 | FREEA(acceptor_knowni_alloc); | |
1278 | } else { | |
1279 | FREE(donor_positions_alloc); | |
1280 | FREE(acceptor_positions_alloc); | |
1281 | FREE(donor_knowni_alloc); | |
1282 | FREE(acceptor_knowni_alloc); | |
1283 | } | |
1284 | #else | |
1285 | FREE(donor_positions_alloc); | |
1286 | FREE(acceptor_positions_alloc); | |
1287 | FREE(donor_knowni_alloc); | |
1288 | FREE(acceptor_knowni_alloc); | |
1289 | #endif | |
1290 | ||
1198 | 1291 | return hits; |
1199 | 1292 | } |
1200 | 1293 | |
1234 | 1327 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
1235 | 1328 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
1236 | 1329 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
1330 | int *donor_positions_alloc, *acceptor_positions_alloc, *donor_knowni_alloc, *acceptor_knowni_alloc; | |
1331 | ||
1237 | 1332 | |
1238 | 1333 | #ifdef HAVE_ALLOCA |
1239 | int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1240 | int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1241 | int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1242 | int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1334 | if (querylength <= MAX_STACK_READLENGTH) { | |
1335 | donor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
1336 | acceptor_positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
1337 | donor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
1338 | acceptor_knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
1339 | } else { | |
1340 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1341 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1342 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1343 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1344 | } | |
1243 | 1345 | #else |
1244 | int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1]; | |
1245 | int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1]; | |
1346 | donor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1347 | acceptor_positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1348 | donor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1349 | acceptor_knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
1246 | 1350 | #endif |
1247 | 1351 | |
1248 | 1352 | debug1(printf("Splice_solve_single: Getting genome at lefti %u and leftj %u (diff: %d)\n", |
1545 | 1649 | |
1546 | 1650 | if (sufficient1p && sufficient2p) { |
1547 | 1651 | *nhits += 1; |
1548 | return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, | |
1652 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, | |
1549 | 1653 | donor,acceptor,best_donor_prob,best_acceptor_prob, |
1550 | 1654 | /*distance*/segmentj_left - segmenti_left, |
1551 | 1655 | /*shortdistancep*/true,splicing_penalty,querylength, |
1555 | 1659 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1556 | 1660 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1557 | 1661 | sarrayp)); |
1662 | /* return hits; */ | |
1558 | 1663 | } else if (subs_or_indels_p == true) { |
1559 | 1664 | if (donor != NULL) Substring_free(&donor); |
1560 | 1665 | if (acceptor != NULL) Substring_free(&acceptor); |
1561 | return hits; | |
1666 | /* return hits; */ | |
1562 | 1667 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { |
1563 | 1668 | if (donor != NULL) Substring_free(&donor); |
1564 | 1669 | if (acceptor != NULL) Substring_free(&acceptor); |
1565 | return hits; | |
1670 | /* return hits; */ | |
1566 | 1671 | } else if (sufficient1p || sufficient2p) { |
1567 | 1672 | *lowprob = List_push(*lowprob, |
1568 | 1673 | (void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches, |
1575 | 1680 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1576 | 1681 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1577 | 1682 | sarrayp)); |
1578 | return hits; | |
1683 | /* return hits; */ | |
1579 | 1684 | } else { |
1580 | 1685 | if (donor != NULL) Substring_free(&donor); |
1581 | 1686 | if (acceptor != NULL) Substring_free(&acceptor); |
1687 | /* ? return hits; */ | |
1582 | 1688 | } |
1583 | 1689 | } |
1584 | 1690 | |
1615 | 1721 | sufficient2p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor_prob); |
1616 | 1722 | if (sufficient1p && sufficient2p) { |
1617 | 1723 | *nhits += 1; |
1618 | return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, | |
1724 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, | |
1619 | 1725 | donor,acceptor,best_donor_prob,best_acceptor_prob, |
1620 | 1726 | /*distance*/segmentj_left - segmenti_left, |
1621 | 1727 | /*shortdistancep*/true,splicing_penalty,querylength, |
1625 | 1731 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1626 | 1732 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1627 | 1733 | sarrayp)); |
1734 | /* return hits; */ | |
1628 | 1735 | } else if (subs_or_indels_p == true) { |
1629 | 1736 | if (donor != NULL) Substring_free(&donor); |
1630 | 1737 | if (acceptor != NULL) Substring_free(&acceptor); |
1631 | return hits; | |
1738 | /* return hits; */ | |
1632 | 1739 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { |
1633 | 1740 | if (donor != NULL) Substring_free(&donor); |
1634 | 1741 | if (acceptor != NULL) Substring_free(&acceptor); |
1635 | return hits; | |
1742 | /* return hits; */ | |
1636 | 1743 | } else if (sufficient1p || sufficient2p) { |
1637 | 1744 | *lowprob = List_push(*lowprob, |
1638 | 1745 | (void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches, |
1645 | 1752 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, |
1646 | 1753 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,first_read_p,sensedir, |
1647 | 1754 | sarrayp)); |
1648 | return hits; | |
1755 | /* return hits; */ | |
1649 | 1756 | } else { |
1650 | 1757 | if (donor != NULL) Substring_free(&donor); |
1651 | 1758 | if (acceptor != NULL) Substring_free(&acceptor); |
1652 | return hits; | |
1653 | } | |
1654 | } | |
1655 | } | |
1656 | } | |
1657 | } | |
1658 | ||
1659 | debug1(printf("Splice_solve_single_antisense fail\n")); | |
1759 | /* ? return hits; */ | |
1760 | } | |
1761 | } | |
1762 | } | |
1763 | } | |
1764 | } | |
1765 | ||
1766 | #ifdef HAVE_ALLOCA | |
1767 | if (querylength <= MAX_STACK_READLENGTH) { | |
1768 | FREEA(donor_positions_alloc); | |
1769 | FREEA(acceptor_positions_alloc); | |
1770 | FREEA(donor_knowni_alloc); | |
1771 | FREEA(acceptor_knowni_alloc); | |
1772 | } else { | |
1773 | FREE(donor_positions_alloc); | |
1774 | FREE(acceptor_positions_alloc); | |
1775 | FREE(donor_knowni_alloc); | |
1776 | FREE(acceptor_knowni_alloc); | |
1777 | } | |
1778 | #else | |
1779 | FREE(donor_positions_alloc); | |
1780 | FREE(acceptor_positions_alloc); | |
1781 | FREE(donor_knowni_alloc); | |
1782 | FREE(acceptor_knowni_alloc); | |
1783 | #endif | |
1784 | ||
1660 | 1785 | return hits; |
1661 | 1786 | } |
1662 | 1787 | |
1663 | ||
1664 | #if 0 | |
1665 | List_T | |
1666 | Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob, | |
1667 | ||
1668 | bool *segmenti_usedp, bool *segmentm_usedp, bool *segmentj_usedp, | |
1669 | Univcoord_T segmenti_left, Univcoord_T segmentm_left, Univcoord_T segmentj_left, | |
1670 | Chrnum_T segmenti_chrnum, Univcoord_T segmenti_chroffset, | |
1671 | Univcoord_T segmenti_chrhigh, Chrpos_T segmenti_chrlength, | |
1672 | Chrnum_T segmentm_chrnum, Univcoord_T segmentm_chroffset, | |
1673 | Univcoord_T segmentm_chrhigh, Chrpos_T segmentm_chrlength, | |
1674 | Chrnum_T segmentj_chrnum, Univcoord_T segmentj_chroffset, | |
1675 | Univcoord_T segmentj_chrhigh, Chrpos_T segmentj_chrlength, | |
1676 | ||
1677 | int querylength, Compress_T query_compress, | |
1678 | int *segmenti_donor_knownpos, int *segmentm_acceptor_knownpos, int *segmentm_donor_knownpos, int *segmentj_acceptor_knownpos, | |
1679 | int *segmentj_antidonor_knownpos, int *segmentm_antiacceptor_knownpos, int *segmentm_antidonor_knownpos, int *segmenti_antiacceptor_knownpos, | |
1680 | int *segmenti_donor_knowni, int *segmentm_acceptor_knowni, int *segmentm_donor_knowni, int *segmentj_acceptor_knowni, | |
1681 | int *segmentj_antidonor_knowni, int *segmentm_antiacceptor_knowni, int *segmentm_antidonor_knowni, int *segmenti_antiacceptor_knowni, | |
1682 | int segmenti_donor_nknown, int segmentm_acceptor_nknown, int segmentm_donor_nknown, int segmentj_acceptor_nknown, | |
1683 | int segmentj_antidonor_nknown, int segmentm_antiacceptor_nknown, int segmentm_antidonor_nknown, int segmenti_antiacceptor_nknown, | |
1684 | int splicing_penalty, int max_mismatches_allowed, bool plusp, int genestrand, | |
1685 | bool subs_or_indels_p, bool sarrayp) { | |
1686 | Substring_T donor, shortexon, acceptor; | |
1687 | int best_splice_pos_1, best_splice_pos_2, splice_pos_start, splice_pos_end, splice_pos_1, splice_pos_2; | |
1688 | int i, a, b, j; | |
1689 | ||
1690 | int best_nmismatches, nmismatches; | |
1691 | int best_segmenti_nmismatches, best_segmentm_nmismatches, best_segmentj_nmismatches, | |
1692 | segmenti_nmismatches, segmentm_nmismatches, segmentj_nmismatches; | |
1693 | int donor_support, acceptor_support, middle_support; | |
1694 | Univcoord_T best_donor1_splicecoord, best_acceptor1_splicecoord, best_donor2_splicecoord, best_acceptor2_splicecoord; | |
1695 | int best_donor1_knowni, best_acceptor1_knowni, best_donor2_knowni, best_acceptor2_knowni; | |
1696 | double best_prob, best_donor1_prob, best_acceptor1_prob, best_donor2_prob, best_acceptor2_prob, | |
1697 | probi, proba, probb, probj; | |
1698 | bool sufficient1p, sufficient2p, sufficient3p, sufficient4p, orig_plusp, matchp; | |
1699 | int sensedir; | |
1700 | ||
1701 | int donori_nsites, acceptora_nsites, donorb_nsites, acceptorj_nsites, | |
1702 | antiacceptori_nsites, antidonora_nsites, antiacceptorb_nsites, antidonorj_nsites; | |
1703 | int *donori_positions, *acceptora_positions, *donorb_positions, *acceptorj_positions, | |
1704 | *antiacceptori_positions, *antidonora_positions, *antiacceptorb_positions, *antidonorj_positions; | |
1705 | int *donori_knowni, *acceptora_knowni, *donorb_knowni, *acceptorj_knowni, | |
1706 | *antiacceptori_knowni, *antidonora_knowni, *antiacceptorb_knowni, *antidonorj_knowni; | |
1707 | ||
1708 | #ifdef HAVE_ALLOCA | |
1709 | int *donor1_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1710 | int *acceptor1_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1711 | int *donor2_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1712 | int *acceptor2_positions_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1713 | int *donor1_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1714 | int *acceptor1_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1715 | int *donor2_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1716 | int *acceptor2_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int)); | |
1717 | #else | |
1718 | int donor1_positions_alloc[MAX_READLENGTH+1], acceptor1_positions_alloc[MAX_READLENGTH+1], | |
1719 | donor2_positions_alloc[MAX_READLENGTH+1], acceptor2_positions_alloc[MAX_READLENGTH+1]; | |
1720 | int donor1_knowni_alloc[MAX_READLENGTH+1], acceptor1_knowni_alloc[MAX_READLENGTH+1], | |
1721 | donor2_knowni_alloc[MAX_READLENGTH+1], acceptor2_knowni_alloc[MAX_READLENGTH+1]; | |
1722 | #endif | |
1723 | ||
1724 | ||
1725 | debug2(printf("Splice_solve_double: Getting genome at lefti %u, leftm %u, and leftj %u\n", | |
1726 | segmenti_left,segmentm_left,segmentj_left)); | |
1727 | ||
1728 | *nhits = 0; | |
1729 | splice_pos_start = min_shortend; | |
1730 | splice_pos_end = querylength - min_shortend; /* ? off by 1, so -l 3 allows only ends of up to 2 */ | |
1731 | ||
1732 | if (splice_pos_start <= splice_pos_end) { | |
1733 | /* Originally from plus strand. No complement. */ | |
1734 | /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */ | |
1735 | ||
1736 | /* Segment i */ | |
1737 | if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) { | |
1738 | donori_nsites = Genome_donor_positions(donor1_positions_alloc,donor1_knowni_alloc, | |
1739 | segmenti_donor_knownpos,segmenti_donor_knowni, | |
1740 | segmenti_left,splice_pos_start,splice_pos_end); | |
1741 | donori_positions = donor1_positions_alloc; | |
1742 | donori_knowni = donor1_knowni_alloc; | |
1743 | } else { | |
1744 | donori_nsites = segmenti_donor_nknown; | |
1745 | donori_positions = segmenti_donor_knownpos; | |
1746 | donori_knowni = segmenti_donor_knowni; | |
1747 | } | |
1748 | ||
1749 | #ifdef DEBUG2 | |
1750 | printf("Found %d donori sites:",donori_nsites); | |
1751 | for (i = 0; i < donori_nsites; i++) { | |
1752 | printf(" %d",donori_positions[i]); | |
1753 | if (donori_knowni[i] >= 0) { | |
1754 | printf(" (%d)",donori_knowni[i]); | |
1755 | } | |
1756 | } | |
1757 | printf("\n"); | |
1758 | #endif | |
1759 | ||
1760 | /* Segment m1 */ | |
1761 | if (novelsplicingp && segmentm_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) { | |
1762 | acceptora_nsites = Genome_acceptor_positions(acceptor1_positions_alloc,acceptor1_knowni_alloc, | |
1763 | segmentm_acceptor_knownpos,segmentm_acceptor_knowni, | |
1764 | segmentm_left,splice_pos_start,splice_pos_end); | |
1765 | acceptora_positions = acceptor1_positions_alloc; | |
1766 | acceptora_knowni = acceptor1_knowni_alloc; | |
1767 | } else { | |
1768 | acceptora_nsites = segmentm_acceptor_nknown; | |
1769 | acceptora_positions = segmentm_acceptor_knownpos; | |
1770 | acceptora_knowni = segmentm_acceptor_knowni; | |
1771 | } | |
1772 | ||
1773 | #ifdef DEBUG2 | |
1774 | printf("Found %d acceptora sites:",acceptora_nsites); | |
1775 | for (i = 0; i < acceptora_nsites; i++) { | |
1776 | printf(" %d",acceptora_positions[i]); | |
1777 | if (acceptora_knowni[i] >= 0) { | |
1778 | printf(" (%d)",acceptora_knowni[i]); | |
1779 | } | |
1780 | } | |
1781 | printf("\n"); | |
1782 | #endif | |
1783 | ||
1784 | /* Segment m2 */ | |
1785 | if (novelsplicingp && segmentm_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) { | |
1786 | donorb_nsites = Genome_donor_positions(donor2_positions_alloc,donor2_knowni_alloc, | |
1787 | segmentm_donor_knownpos,segmentm_donor_knowni, | |
1788 | segmentm_left,splice_pos_start,splice_pos_end); | |
1789 | donorb_positions = donor2_positions_alloc; | |
1790 | donorb_knowni = donor2_knowni_alloc; | |
1791 | } else { | |
1792 | donorb_nsites = segmentm_donor_nknown; | |
1793 | donorb_positions = segmentm_donor_knownpos; | |
1794 | donorb_knowni = segmentm_donor_knowni; | |
1795 | } | |
1796 | ||
1797 | #ifdef DEBUG2 | |
1798 | printf("Found %d donorb sites:",donorb_nsites); | |
1799 | for (i = 0; i < donorb_nsites; i++) { | |
1800 | printf(" %d",donorb_positions[i]); | |
1801 | if (donorb_knowni[i] >= 0) { | |
1802 | printf(" (%d)",donorb_knowni[i]); | |
1803 | } | |
1804 | } | |
1805 | printf("\n"); | |
1806 | #endif | |
1807 | ||
1808 | /* Segment j */ | |
1809 | if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) { | |
1810 | acceptorj_nsites = Genome_acceptor_positions(acceptor2_positions_alloc,acceptor2_knowni_alloc, | |
1811 | segmentj_acceptor_knownpos,segmentj_acceptor_knowni, | |
1812 | segmentj_left,splice_pos_start,splice_pos_end); | |
1813 | acceptorj_positions = acceptor2_positions_alloc; | |
1814 | acceptorj_knowni = acceptor2_knowni_alloc; | |
1815 | } else { | |
1816 | acceptorj_nsites = segmentj_acceptor_nknown; | |
1817 | acceptorj_positions = segmentj_acceptor_knownpos; | |
1818 | acceptorj_knowni = segmentj_acceptor_knowni; | |
1819 | } | |
1820 | ||
1821 | #ifdef DEBUG2 | |
1822 | printf("Found %d acceptorj sites:",acceptorj_nsites); | |
1823 | for (i = 0; i < acceptorj_nsites; i++) { | |
1824 | printf(" %d",acceptorj_positions[i]); | |
1825 | if (acceptorj_knowni[i] >= 0) { | |
1826 | printf(" (%d)",acceptorj_knowni[i]); | |
1827 | } | |
1828 | } | |
1829 | printf("\n"); | |
1830 | #endif | |
1831 | ||
1832 | best_nmismatches = max_mismatches_allowed; | |
1833 | best_prob = 0.0; | |
1834 | orig_plusp = true; | |
1835 | ||
1836 | i = a = b = j = 0; | |
1837 | while (i < donori_nsites && a < acceptora_nsites) { | |
1838 | if ((splice_pos_1 = donori_positions[i]) < acceptora_positions[a]) { | |
1839 | i++; | |
1840 | } else if (splice_pos_1 > acceptora_positions[a]) { | |
1841 | a++; | |
1842 | } else { | |
1843 | while (b < donorb_nsites && donorb_positions[b] <= splice_pos_1) { | |
1844 | b++; | |
1845 | } | |
1846 | while (j < acceptorj_nsites && acceptorj_positions[j] <= splice_pos_1) { | |
1847 | j++; | |
1848 | } | |
1849 | matchp = false; | |
1850 | while (b < donorb_nsites && j < acceptorj_nsites && matchp == false) { | |
1851 | if ((splice_pos_2 = donorb_positions[b]) < acceptorj_positions[j]) { | |
1852 | b++; | |
1853 | } else if (splice_pos_2 > acceptorj_positions[j]) { | |
1854 | j++; | |
1855 | } else { | |
1856 | segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos_1, | |
1857 | plusp,genestrand); | |
1858 | segmentm_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentm_left,/*pos5*/splice_pos_1,/*pos3*/splice_pos_2, | |
1859 | plusp,genestrand); | |
1860 | segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos_2,/*pos3*/querylength, | |
1861 | plusp,genestrand); | |
1862 | if ((nmismatches = segmenti_nmismatches + segmentm_nmismatches + segmentj_nmismatches) <= best_nmismatches) { | |
1863 | if (donori_knowni[i] >= 0) { | |
1864 | probi = 1.0; /* Needs to be 1.0 for output */ | |
1865 | } else { | |
1866 | probi = Maxent_hr_donor_prob(segmenti_left + splice_pos_1,segmenti_chroffset); | |
1867 | } | |
1868 | ||
1869 | if (acceptora_knowni[a] >= 0) { | |
1870 | proba = 1.0; /* Needs to be 1.0 for output */ | |
1871 | } else { | |
1872 | proba = Maxent_hr_acceptor_prob(segmentm_left + splice_pos_1,segmentm_chroffset); | |
1873 | } | |
1874 | ||
1875 | if (donorb_knowni[b] >= 0) { | |
1876 | probb = 1.0; /* Needs to be 1.0 for output */ | |
1877 | } else { | |
1878 | probb = Maxent_hr_donor_prob(segmentm_left + splice_pos_2,segmentm_chroffset); | |
1879 | } | |
1880 | ||
1881 | if (acceptorj_knowni[j] >= 0) { | |
1882 | probj = 1.0; /* Needs to be 1.0 for output */ | |
1883 | } else { | |
1884 | probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos_2,segmentj_chroffset); | |
1885 | } | |
1886 | ||
1887 | debug2( | |
1888 | if (plusp == true) { | |
1889 | printf("plus sense splice_pos %d, %d, i.donor %f, m.acceptor %f, m.donor %f, j.acceptor %f\n", | |
1890 | splice_pos_1,splice_pos_2,probi,proba,probb,probj); | |
1891 | } else { | |
1892 | printf("minus antisense splice_pos %d %d, i.donor %f, m.acceptor %f, m.donor %f, j.acceptor %f\n", | |
1893 | splice_pos_1,splice_pos_2,probi,proba,probb,probj); | |
1894 | }); | |
1895 | ||
1896 | if (nmismatches < best_nmismatches || | |
1897 | (nmismatches == best_nmismatches && probi + proba + probb + probj > best_prob)) { | |
1898 | /* Success */ | |
1899 | best_nmismatches = nmismatches; | |
1900 | best_prob = probi + proba + probb + probj; | |
1901 | ||
1902 | best_donor1_splicecoord = segmenti_left + splice_pos_1; | |
1903 | best_acceptor1_splicecoord = segmentm_left + splice_pos_1; | |
1904 | best_donor2_splicecoord = segmentm_left + splice_pos_2; | |
1905 | best_acceptor2_splicecoord = segmentj_left + splice_pos_2; | |
1906 | best_donor1_knowni = donori_knowni[i]; | |
1907 | best_acceptor1_knowni = acceptora_knowni[a]; | |
1908 | best_donor2_knowni = donorb_knowni[b]; | |
1909 | best_acceptor2_knowni = acceptorj_knowni[j]; | |
1910 | best_donor1_prob = probi; | |
1911 | best_acceptor1_prob = proba; | |
1912 | best_donor2_prob = probb; | |
1913 | best_acceptor2_prob = probj; | |
1914 | best_splice_pos_1 = splice_pos_1; | |
1915 | best_splice_pos_2 = splice_pos_2; | |
1916 | best_segmenti_nmismatches = segmenti_nmismatches; | |
1917 | best_segmentm_nmismatches = segmentm_nmismatches; | |
1918 | best_segmentj_nmismatches = segmentj_nmismatches; | |
1919 | } | |
1920 | } | |
1921 | /* b++; j++; Don't advance b or j, so next i/a can match */ | |
1922 | matchp = true; | |
1923 | } | |
1924 | } | |
1925 | i++; | |
1926 | a++; | |
1927 | } | |
1928 | } | |
1929 | ||
1930 | ||
1931 | /* Originally from minus strand. Complement. */ | |
1932 | /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */ | |
1933 | ||
1934 | /* Segment i */ | |
1935 | if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) { | |
1936 | antiacceptori_nsites = Genome_antiacceptor_positions(acceptor1_positions_alloc,acceptor1_knowni_alloc, | |
1937 | segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni, | |
1938 | segmenti_left,splice_pos_start,splice_pos_end); | |
1939 | antiacceptori_positions = acceptor1_positions_alloc; | |
1940 | antiacceptori_knowni = acceptor1_knowni_alloc; | |
1941 | } else { | |
1942 | antiacceptori_nsites = segmenti_antiacceptor_nknown; | |
1943 | antiacceptori_positions = segmenti_antiacceptor_knownpos; | |
1944 | antiacceptori_knowni = segmenti_antiacceptor_knowni; | |
1945 | } | |
1946 | ||
1947 | #ifdef DEBUG2 | |
1948 | printf("Found %d antiacceptori sites:",antiacceptori_nsites); | |
1949 | for (i = 0; i < antiacceptori_nsites; i++) { | |
1950 | printf(" %d",antiacceptori_positions[i]); | |
1951 | if (antiacceptori_knowni[i] >= 0) { | |
1952 | printf(" (%d)",antiacceptori_knowni[i]); | |
1953 | } | |
1954 | } | |
1955 | printf("\n"); | |
1956 | #endif | |
1957 | ||
1958 | /* Segment m1 */ | |
1959 | if (novelsplicingp && segmentm_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) { | |
1960 | antidonora_nsites = Genome_antidonor_positions(donor1_positions_alloc,donor1_knowni_alloc, | |
1961 | segmentm_antidonor_knownpos,segmentm_antidonor_knowni, | |
1962 | segmentm_left,splice_pos_start,splice_pos_end); | |
1963 | antidonora_positions = donor1_positions_alloc; | |
1964 | antidonora_knowni = donor1_knowni_alloc; | |
1965 | } else { | |
1966 | antidonora_nsites = segmentm_antidonor_nknown; | |
1967 | antidonora_positions = segmentm_antidonor_knownpos; | |
1968 | antidonora_knowni = segmentm_antidonor_knowni; | |
1969 | } | |
1970 | ||
1971 | #ifdef DEBUG2 | |
1972 | printf("Found %d antidonora sites:",antidonora_nsites); | |
1973 | for (i = 0; i < antidonora_nsites; i++) { | |
1974 | printf(" %d",antidonora_positions[i]); | |
1975 | if (antidonora_knowni[i] >= 0) { | |
1976 | printf(" (%d)",antidonora_knowni[i]); | |
1977 | } | |
1978 | } | |
1979 | printf("\n"); | |
1980 | #endif | |
1981 | ||
1982 | /* Segment m2 */ | |
1983 | if (novelsplicingp && segmentm_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) { | |
1984 | antiacceptorb_nsites = Genome_antiacceptor_positions(acceptor2_positions_alloc,acceptor2_knowni_alloc, | |
1985 | segmentm_antiacceptor_knownpos,segmentm_antiacceptor_knowni, | |
1986 | segmentm_left,splice_pos_start,splice_pos_end); | |
1987 | antiacceptorb_positions = acceptor2_positions_alloc; | |
1988 | antiacceptorb_knowni = acceptor2_knowni_alloc; | |
1989 | } else { | |
1990 | antiacceptorb_nsites = segmentm_antiacceptor_nknown; | |
1991 | antiacceptorb_positions = segmentm_antiacceptor_knownpos; | |
1992 | antiacceptorb_knowni = segmentm_antiacceptor_knowni; | |
1993 | } | |
1994 | ||
1995 | #ifdef DEBUG2 | |
1996 | printf("Found %d antiacceptorb sites:",antiacceptorb_nsites); | |
1997 | for (i = 0; i < antiacceptorb_nsites; i++) { | |
1998 | printf(" %d",antiacceptorb_positions[i]); | |
1999 | if (antiacceptorb_knowni[i] >= 0) { | |
2000 | printf(" (%d)",antiacceptorb_knowni[i]); | |
2001 | } | |
2002 | } | |
2003 | printf("\n"); | |
2004 | #endif | |
2005 | ||
2006 | /* Segment j */ | |
2007 | if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) { | |
2008 | antidonorj_nsites = Genome_antidonor_positions(donor2_positions_alloc,donor2_knowni_alloc, | |
2009 | segmentj_antidonor_knownpos,segmentj_antidonor_knowni, | |
2010 | segmentj_left,splice_pos_start,splice_pos_end); | |
2011 | antidonorj_positions = donor2_positions_alloc; | |
2012 | antidonorj_knowni = donor2_knowni_alloc; | |
2013 | } else { | |
2014 | antidonorj_nsites = segmentj_antidonor_nknown; | |
2015 | antidonorj_positions = segmentj_antidonor_knownpos; | |
2016 | antidonorj_knowni = segmentj_antidonor_knowni; | |
2017 | } | |
2018 | ||
2019 | #ifdef DEBUG2 | |
2020 | printf("Found %d antidonorj sites:",antidonorj_nsites); | |
2021 | for (i = 0; i < antidonorj_nsites; i++) { | |
2022 | printf(" %d",antidonorj_positions[i]); | |
2023 | if (antidonorj_knowni[i] >= 0) { | |
2024 | printf(" (%d)",antidonorj_knowni[i]); | |
2025 | } | |
2026 | } | |
2027 | printf("\n"); | |
2028 | #endif | |
2029 | ||
2030 | ||
2031 | i = a = b = j = 0; | |
2032 | while (i < antiacceptori_nsites && a < antidonora_nsites) { | |
2033 | if ((splice_pos_1 = antiacceptori_positions[i]) < antidonora_positions[a]) { | |
2034 | i++; | |
2035 | } else if (splice_pos_1 > antidonora_positions[a]) { | |
2036 | a++; | |
2037 | } else { | |
2038 | while (b < antiacceptorb_nsites && antiacceptorb_positions[b] <= splice_pos_1) { | |
2039 | b++; | |
2040 | } | |
2041 | while (j < antidonorj_nsites && antidonorj_positions[j] <= splice_pos_1) { | |
2042 | j++; | |
2043 | } | |
2044 | matchp = false; | |
2045 | while (b < antiacceptorb_nsites && j < antidonorj_nsites && matchp == false) { | |
2046 | if ((splice_pos_2 = antiacceptorb_positions[b]) < antidonorj_positions[j]) { | |
2047 | b++; | |
2048 | } else if (splice_pos_2 > antidonorj_positions[j]) { | |
2049 | j++; | |
2050 | } else { | |
2051 | segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/splice_pos_1, | |
2052 | plusp,genestrand); | |
2053 | segmentm_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentm_left,/*pos5*/splice_pos_1,/*pos3*/splice_pos_2, | |
2054 | plusp,genestrand); | |
2055 | segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos_2,/*pos3*/querylength, | |
2056 | plusp,genestrand); | |
2057 | ||
2058 | if ((nmismatches = segmenti_nmismatches + segmentm_nmismatches + segmentj_nmismatches) <= best_nmismatches) { | |
2059 | if (antiacceptori_knowni[i] >= 0) { | |
2060 | probi = 1.0; /* Needs to be 1.0 for output */ | |
2061 | } else { | |
2062 | probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos_1,segmenti_chroffset); | |
2063 | } | |
2064 | ||
2065 | if (antidonora_knowni[a] >= 0) { | |
2066 | proba = 1.0; /* Needs to be 1.0 for output */ | |
2067 | } else { | |
2068 | proba = Maxent_hr_antidonor_prob(segmentm_left + splice_pos_1,segmentm_chroffset); | |
2069 | } | |
2070 | ||
2071 | if (antiacceptorb_knowni[b] >= 0) { | |
2072 | probb = 1.0; /* Needs to be 1.0 for output */ | |
2073 | } else { | |
2074 | probb = Maxent_hr_antiacceptor_prob(segmentm_left + splice_pos_2,segmentm_chroffset); | |
2075 | } | |
2076 | ||
2077 | if (antidonorj_knowni[j] >= 0) { | |
2078 | probj = 1.0; /* Needs to be 1.0 for output */ | |
2079 | } else { | |
2080 | probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos_2,segmentj_chroffset); | |
2081 | } | |
2082 | ||
2083 | debug2( | |
2084 | if (plusp == true) { | |
2085 | printf("plus antisense splice_pos %d, %d, i.antiacceptor %f, m.antidonor %f, m.antiacceptor %f, j.antidonor %f\n", | |
2086 | splice_pos_1,splice_pos_2,probi,proba,probb,probj); | |
2087 | } else { | |
2088 | printf("minus sense splice_pos %d, %d, i.antiacceptor %f, m.antidonor %f, m.antiacceptor %f, j.antidonor %f\n", | |
2089 | splice_pos_1,splice_pos_2,probi,proba,probb,probj); | |
2090 | }); | |
2091 | ||
2092 | if (nmismatches < best_nmismatches || | |
2093 | (nmismatches == best_nmismatches && probi + proba + probb + probj > best_prob)) { | |
2094 | /* Success */ | |
2095 | best_nmismatches = nmismatches; | |
2096 | best_prob = probi + proba + probb + probj; | |
2097 | ||
2098 | best_acceptor1_splicecoord = segmenti_left + splice_pos_1; | |
2099 | best_donor1_splicecoord = segmentm_left + splice_pos_1; | |
2100 | best_acceptor2_splicecoord = segmentm_left + splice_pos_2; | |
2101 | best_donor2_splicecoord = segmentj_left + splice_pos_2; | |
2102 | best_acceptor1_knowni = antiacceptori_knowni[i]; | |
2103 | best_donor1_knowni = antidonora_knowni[a]; | |
2104 | best_acceptor2_knowni = antiacceptorb_knowni[b]; | |
2105 | best_donor2_knowni = antidonorj_knowni[j]; | |
2106 | best_acceptor1_prob = probi; | |
2107 | best_donor1_prob = proba; | |
2108 | best_acceptor2_prob = probb; | |
2109 | best_donor2_prob = probj; | |
2110 | best_splice_pos_1 = splice_pos_1; | |
2111 | best_splice_pos_2 = splice_pos_2; | |
2112 | best_segmenti_nmismatches = segmenti_nmismatches; | |
2113 | best_segmentm_nmismatches = segmentm_nmismatches; | |
2114 | best_segmentj_nmismatches = segmentj_nmismatches; | |
2115 | orig_plusp = false; | |
2116 | } | |
2117 | } | |
2118 | /* b++; j++; Don't advance b or j, so next i/a can match */ | |
2119 | matchp = true; | |
2120 | } | |
2121 | } | |
2122 | i++; | |
2123 | a++; | |
2124 | } | |
2125 | } | |
2126 | ||
2127 | ||
2128 | if (best_prob > 0.0) { | |
2129 | debug2(printf("best_prob = %f at splice_pos %d and %d\n",best_prob,best_splice_pos_1,best_splice_pos_2)); | |
2130 | if (orig_plusp == true) { | |
2131 | /* Originally from plus strand. No complement. */ | |
2132 | sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI; | |
2133 | ||
2134 | donor = Substring_new_donor(best_donor1_splicecoord,best_donor1_knowni, | |
2135 | best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
2136 | best_segmenti_nmismatches, | |
2137 | best_donor1_prob,/*left*/segmenti_left,query_compress, | |
2138 | querylength,plusp,genestrand,sensedir, | |
2139 | segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength); | |
2140 | ||
2141 | shortexon = Substring_new_shortexon(best_acceptor1_splicecoord,best_acceptor1_knowni, | |
2142 | best_donor2_splicecoord,best_donor2_knowni, | |
2143 | /*acceptor_pos*/best_splice_pos_1,/*donor_pos*/best_splice_pos_2,best_segmentm_nmismatches, | |
2144 | /*acceptor_prob*/best_acceptor1_prob,/*donor_prob*/best_donor2_prob, | |
2145 | /*left*/segmentm_left,query_compress, | |
2146 | querylength,plusp,genestrand, | |
2147 | sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false, | |
2148 | segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength); | |
2149 | ||
2150 | acceptor = Substring_new_acceptor(best_acceptor2_splicecoord,best_acceptor2_knowni, | |
2151 | best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
2152 | best_segmentj_nmismatches, | |
2153 | best_acceptor2_prob,/*left*/segmentj_left,query_compress, | |
2154 | querylength,plusp,genestrand,sensedir, | |
2155 | segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength); | |
2156 | ||
2157 | if (donor == NULL || shortexon == NULL || acceptor == NULL) { | |
2158 | if (donor != NULL) Substring_free(&donor); | |
2159 | if (shortexon != NULL) Substring_free(&shortexon); | |
2160 | if (acceptor != NULL) Substring_free(&acceptor); | |
2161 | } else { | |
2162 | *segmenti_usedp = *segmentm_usedp = *segmentj_usedp = true; | |
2163 | ||
2164 | donor_support = best_splice_pos_1; | |
2165 | middle_support = best_splice_pos_2 - best_splice_pos_1; | |
2166 | acceptor_support = querylength - best_splice_pos_2; | |
2167 | sufficient1p = sufficient_splice_prob_local(donor_support,best_segmenti_nmismatches,best_donor1_prob); | |
2168 | sufficient2p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_acceptor1_prob); | |
2169 | sufficient3p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_donor2_prob); | |
2170 | sufficient4p = sufficient_splice_prob_local(acceptor_support,best_segmentj_nmismatches,best_acceptor2_prob); | |
2171 | if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) { | |
2172 | *nhits += 1; | |
2173 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
2174 | best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob, | |
2175 | /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob, | |
2176 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
2177 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
2178 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
2179 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
2180 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
2181 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
2182 | splicing_penalty,querylength,sensedir,sarrayp)); | |
2183 | } else if (subs_or_indels_p == true) { | |
2184 | /* Don't alter hits */ | |
2185 | if (donor != NULL) Substring_free(&donor); | |
2186 | if (shortexon != NULL) Substring_free(&shortexon); | |
2187 | if (acceptor != NULL) Substring_free(&acceptor); | |
2188 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { | |
2189 | if (donor != NULL) Substring_free(&donor); | |
2190 | if (shortexon != NULL) Substring_free(&shortexon); | |
2191 | if (acceptor != NULL) Substring_free(&acceptor); | |
2192 | } else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) { | |
2193 | *lowprob = List_push(*lowprob, | |
2194 | (void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
2195 | best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob, | |
2196 | /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob, | |
2197 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
2198 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
2199 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
2200 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
2201 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
2202 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
2203 | splicing_penalty,querylength,sensedir,sarrayp)); | |
2204 | } else { | |
2205 | if (donor != NULL) Substring_free(&donor); | |
2206 | if (shortexon != NULL) Substring_free(&shortexon); | |
2207 | if (acceptor != NULL) Substring_free(&acceptor); | |
2208 | } | |
2209 | } | |
2210 | ||
2211 | } else { | |
2212 | /* Originally from minus strand. Complement. */ | |
2213 | sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD; | |
2214 | ||
2215 | donor = Substring_new_donor(best_donor2_splicecoord,best_donor2_knowni, | |
2216 | best_splice_pos_2,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
2217 | best_segmentj_nmismatches, | |
2218 | best_donor2_prob,/*left*/segmentj_left,query_compress, | |
2219 | querylength,plusp,genestrand,sensedir, | |
2220 | segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength); | |
2221 | ||
2222 | shortexon = Substring_new_shortexon(best_acceptor2_splicecoord,best_acceptor2_knowni, | |
2223 | best_donor1_splicecoord,best_donor1_knowni, | |
2224 | /*acceptor_pos*/best_splice_pos_2,/*donor_pos*/best_splice_pos_1,best_segmentm_nmismatches, | |
2225 | /*acceptor_prob*/best_acceptor2_prob,/*donor_prob*/best_donor1_prob, | |
2226 | /*left*/segmentm_left,query_compress,querylength, | |
2227 | plusp,genestrand,sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false, | |
2228 | segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength); | |
2229 | ||
2230 | acceptor = Substring_new_acceptor(best_acceptor1_splicecoord,best_acceptor1_knowni, | |
2231 | best_splice_pos_1,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
2232 | best_segmenti_nmismatches, | |
2233 | best_acceptor1_prob,/*left*/segmenti_left,query_compress, | |
2234 | querylength,plusp,genestrand,sensedir, | |
2235 | segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength); | |
2236 | ||
2237 | if (donor == NULL || shortexon == NULL || acceptor == NULL) { | |
2238 | if (donor != NULL) Substring_free(&donor); | |
2239 | if (shortexon != NULL) Substring_free(&shortexon); | |
2240 | if (acceptor != NULL) Substring_free(&acceptor); | |
2241 | } else { | |
2242 | *segmenti_usedp = *segmentm_usedp = *segmentj_usedp = true; | |
2243 | ||
2244 | acceptor_support = best_splice_pos_1; | |
2245 | middle_support = best_splice_pos_2 - best_splice_pos_1; | |
2246 | donor_support = querylength - best_splice_pos_2; | |
2247 | sufficient1p = sufficient_splice_prob_local(acceptor_support,best_segmenti_nmismatches,best_acceptor1_prob); | |
2248 | sufficient2p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_donor1_prob); | |
2249 | sufficient3p = sufficient_splice_prob_local(middle_support,best_segmentm_nmismatches,best_acceptor2_prob); | |
2250 | sufficient4p = sufficient_splice_prob_local(donor_support,best_segmentj_nmismatches,best_donor2_prob); | |
2251 | if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) { | |
2252 | *nhits += 1; | |
2253 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
2254 | best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob, | |
2255 | /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob, | |
2256 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
2257 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
2258 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
2259 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
2260 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
2261 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
2262 | splicing_penalty,querylength,sensedir,sarrayp)); | |
2263 | } else if (subs_or_indels_p == true) { | |
2264 | /* Don't alter hits */ | |
2265 | if (donor != NULL) Substring_free(&donor); | |
2266 | if (shortexon != NULL) Substring_free(&shortexon); | |
2267 | if (acceptor != NULL) Substring_free(&acceptor); | |
2268 | } else if (donor_support < LOWPROB_SUPPORT || acceptor_support < LOWPROB_SUPPORT) { | |
2269 | if (donor != NULL) Substring_free(&donor); | |
2270 | if (shortexon != NULL) Substring_free(&shortexon); | |
2271 | if (acceptor != NULL) Substring_free(&acceptor); | |
2272 | } else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) { | |
2273 | *lowprob = List_push(*lowprob, | |
2274 | (void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
2275 | best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob, | |
2276 | /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob, | |
2277 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
2278 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
2279 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
2280 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
2281 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
2282 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
2283 | splicing_penalty,querylength,sensedir,sarrayp)); | |
2284 | } else { | |
2285 | if (donor != NULL) Substring_free(&donor); | |
2286 | if (shortexon != NULL) Substring_free(&shortexon); | |
2287 | if (acceptor != NULL) Substring_free(&acceptor); | |
2288 | } | |
2289 | } | |
2290 | } | |
2291 | } | |
2292 | } | |
2293 | ||
2294 | return hits; | |
2295 | } | |
2296 | #endif | |
2297 | 1788 | |
2298 | 1789 | |
2299 | 1790 | static int |
2377 | 1868 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
2378 | 1869 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
2379 | 1870 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
2380 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2381 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
1871 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
1872 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2382 | 1873 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
2383 | 1874 | best_nmismatches = nmismatches; |
2384 | 1875 | } |
2394 | 1885 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
2395 | 1886 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
2396 | 1887 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
2397 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2398 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
1888 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
1889 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2399 | 1890 | n_good_spliceends += 1; |
2400 | 1891 | accepted_hits = List_push(accepted_hits,(void *) hit); |
2401 | 1892 | } else { |
2411 | 1902 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || |
2412 | 1903 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
2413 | 1904 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
2414 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2415 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
1905 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
1906 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2416 | 1907 | n_good_spliceends += 1; |
2417 | 1908 | accepted_hits = List_push(accepted_hits,(void *) hit); |
2418 | 1909 | } else { |
2482 | 1973 | for (kk = ii; kk < jj; kk++) { |
2483 | 1974 | acceptor = Stage3end_substring_acceptor(subarray[kk]); |
2484 | 1975 | #ifdef LARGE_GENOMES |
2485 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
1976 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
2486 | 1977 | #else |
2487 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
1978 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
2488 | 1979 | #endif |
2489 | 1980 | amb_knowni = Intlist_push(amb_knowni,-1); |
2490 | 1981 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
2491 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
1982 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
2492 | 1983 | } |
2493 | 1984 | |
2494 | 1985 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
2550 | 2041 | for (kk = ii; kk < jj; kk++) { |
2551 | 2042 | donor = Stage3end_substring_donor(subarray[kk]); |
2552 | 2043 | #ifdef LARGE_GENOMES |
2553 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
2044 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
2554 | 2045 | #else |
2555 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
2046 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
2556 | 2047 | #endif |
2557 | 2048 | amb_knowni = Intlist_push(amb_knowni,-1); |
2558 | 2049 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
2559 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
2050 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
2560 | 2051 | } |
2561 | 2052 | |
2562 | 2053 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
2705 | 2196 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
2706 | 2197 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
2707 | 2198 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
2708 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2709 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
2199 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
2200 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2710 | 2201 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
2711 | 2202 | best_nmismatches = nmismatches; |
2712 | 2203 | } |
2722 | 2213 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
2723 | 2214 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
2724 | 2215 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
2725 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2726 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
2216 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
2217 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2727 | 2218 | n_good_spliceends += 1; |
2728 | 2219 | accepted_hits = List_push(accepted_hits,(void *) hit); |
2729 | 2220 | } else { |
2739 | 2230 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || |
2740 | 2231 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
2741 | 2232 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
2742 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
2743 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
2233 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
2234 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
2744 | 2235 | n_good_spliceends += 1; |
2745 | 2236 | accepted_hits = List_push(accepted_hits,(void *) hit); |
2746 | 2237 | } else { |
2813 | 2304 | for (kk = ii; kk < jj; kk++) { |
2814 | 2305 | acceptor = Stage3end_substring_acceptor(subarray[kk]); |
2815 | 2306 | #ifdef LARGE_GENOMES |
2816 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
2307 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
2817 | 2308 | #else |
2818 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
2309 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
2819 | 2310 | #endif |
2820 | 2311 | amb_knowni = Intlist_push(amb_knowni,-1); |
2821 | 2312 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
2822 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
2313 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
2823 | 2314 | } |
2824 | 2315 | |
2825 | 2316 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
2881 | 2372 | for (kk = ii; kk < jj; kk++) { |
2882 | 2373 | donor = Stage3end_substring_donor(subarray[kk]); |
2883 | 2374 | #ifdef LARGE_GENOMES |
2884 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
2375 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
2885 | 2376 | #else |
2886 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
2377 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
2887 | 2378 | #endif |
2888 | 2379 | amb_knowni = Intlist_push(amb_knowni,-1); |
2889 | 2380 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
2890 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
2381 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
2891 | 2382 | } |
2892 | 2383 | |
2893 | 2384 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
0 | static char rcsid[] = "$Id: stage1hr.c 195972 2016-08-08 17:11:50Z twu $"; | |
0 | static char rcsid[] = "$Id: stage1hr.c 196433 2016-08-16 20:20:51Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
88 | 88 | #define MAX_INDEXSIZE 8 |
89 | 89 | #endif |
90 | 90 | |
91 | /* Note: MAX_READLENGTH is defined externally by configure */ | |
92 | #ifndef MAX_READLENGTH | |
93 | #error A default value for MAX_READLENGTH was not provided to configure | |
94 | #endif | |
95 | ||
96 | 91 | |
97 | 92 | /* MAX_NALIGNMENTS of 2 vs 1 gets 1600 improvements in 275,000 reads */ |
98 | 93 | /* MAX_NALIGNMENTS of 3 vs 2 gets 96 improvements in 275,000 reads */ |
163 | 158 | static int max_gmap_pairsearch; |
164 | 159 | static int max_gmap_segments; /* Not used */ |
165 | 160 | static int max_gmap_improvement; |
161 | ||
162 | static int max_floors_readlength; | |
166 | 163 | |
167 | 164 | |
168 | 165 | #define A_CHAR 0x0 |
4560 | 4557 | ptr->leftmost = ptr->rightmost = -1; |
4561 | 4558 | ptr->left_splice_p = ptr->right_splice_p = false; |
4562 | 4559 | ptr->spliceable_low_p = last_spliceable_p; |
4560 | /* ptr->spliceable_high_p = false; */ | |
4563 | 4561 | #if 0 |
4564 | 4562 | ptr->leftspan = ptr->rightspan = -1; |
4565 | 4563 | #endif |
4573 | 4571 | so if segmenti->querypos3 is too high, then it is not spliceable */ |
4574 | 4572 | if (last_querypos > query_lastpos) { |
4575 | 4573 | /* Not spliceable */ |
4574 | last_spliceable_p = false; | |
4576 | 4575 | } else if (diagonal <= last_diagonal + max_distance) { |
4577 | 4576 | *ptr_spliceable++ = ptr; |
4578 | 4577 | ptr->spliceable_high_p = last_spliceable_p = true; |
4582 | 4581 | so if segmenti->querypos5 is too low, then it is not spliceable */ |
4583 | 4582 | if (first_querypos < index1part) { |
4584 | 4583 | /* Not spliceable */ |
4584 | last_spliceable_p = false; | |
4585 | 4585 | } else if (diagonal <= last_diagonal + max_distance) { |
4586 | 4586 | *ptr_spliceable++ = ptr; |
4587 | 4587 | ptr->spliceable_high_p = last_spliceable_p = true; |
4844 | 4844 | ptr->leftmost = ptr->rightmost = -1; |
4845 | 4845 | ptr->left_splice_p = ptr->right_splice_p = false; |
4846 | 4846 | ptr->spliceable_low_p = last_spliceable_p; |
4847 | ptr->spliceable_high_p = false; | |
4847 | 4848 | #if 0 |
4848 | 4849 | ptr->leftspan = ptr->rightspan = -1; |
4849 | 4850 | #endif |
6448 | 6449 | int sum, best_sum = querylength; |
6449 | 6450 | int conti, shifti; |
6450 | 6451 | int best_indel_pos = -1, endlength; |
6451 | ||
6452 | #ifdef HAVE_ALLOCA | |
6453 | int *mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6454 | #else | |
6455 | int mismatch_positions_shift[MAX_READLENGTH+1]; | |
6456 | #endif | |
6457 | ||
6458 | 6452 | #ifdef OLD_END_INDELS |
6459 | 6453 | int indel_pos; |
6460 | 6454 | #else |
6461 | 6455 | int indel_pos_cont, indel_pos_shift; |
6462 | 6456 | #endif |
6457 | int *mismatch_positions_shift; | |
6458 | ||
6459 | ||
6460 | #ifdef HAVE_ALLOCA | |
6461 | if (querylength <= MAX_STACK_READLENGTH) { | |
6462 | mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6463 | } else { | |
6464 | mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6465 | } | |
6466 | #else | |
6467 | mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6468 | #endif | |
6469 | ||
6463 | 6470 | |
6464 | 6471 | debug2e(printf("Entered compute_end_indels_right with breakpoint = %d, max_mismatches_short %d\n", |
6465 | 6472 | breakpoint,max_mismatches_short)); |
6612 | 6619 | } |
6613 | 6620 | } |
6614 | 6621 | } |
6615 | shifti--; | |
6616 | indel_pos_shift = mismatch_positions_shift[shifti] + 1; | |
6622 | if (--shifti >= 0) { | |
6623 | indel_pos_shift = mismatch_positions_shift[shifti] + 1; | |
6624 | } | |
6617 | 6625 | |
6618 | 6626 | } else { |
6619 | 6627 | sum = shifti + conti; |
6635 | 6643 | } |
6636 | 6644 | } |
6637 | 6645 | conti++; |
6638 | shifti--; | |
6639 | indel_pos_cont = mismatch_positions_long[conti]; | |
6640 | indel_pos_shift = mismatch_positions_shift[shifti] + 1; | |
6646 | if (--shifti >= 0) { | |
6647 | indel_pos_cont = mismatch_positions_long[conti]; | |
6648 | indel_pos_shift = mismatch_positions_shift[shifti] + 1; | |
6649 | } | |
6641 | 6650 | } |
6642 | 6651 | } |
6643 | 6652 | |
6816 | 6825 | } |
6817 | 6826 | } |
6818 | 6827 | } |
6819 | shifti--; | |
6820 | indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1; | |
6828 | if (--shifti >= 0) { | |
6829 | indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1; | |
6830 | } | |
6821 | 6831 | |
6822 | 6832 | } else { |
6823 | 6833 | sum = shifti + conti; |
6839 | 6849 | } |
6840 | 6850 | } |
6841 | 6851 | conti++; |
6842 | shifti--; | |
6843 | indel_pos_cont = mismatch_positions_long[conti]; | |
6844 | indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1; | |
6852 | if (--shifti >= 0) { | |
6853 | indel_pos_cont = mismatch_positions_long[conti]; | |
6854 | indel_pos_shift = mismatch_positions_shift[shifti] - sep + 1; | |
6855 | } | |
6845 | 6856 | } |
6846 | 6857 | } |
6847 | 6858 | |
6871 | 6882 | } |
6872 | 6883 | } |
6873 | 6884 | |
6885 | #ifdef HAVE_ALLOCA | |
6886 | if (querylength <= MAX_STACK_READLENGTH) { | |
6887 | FREEA(mismatch_positions_shift); | |
6888 | } else { | |
6889 | FREE(mismatch_positions_shift); | |
6890 | } | |
6891 | #else | |
6892 | FREE(mismatch_positions_shift); | |
6893 | #endif | |
6894 | ||
6874 | 6895 | debug2e(printf("compute_end_indels_right returning with nmismatches_longcont %d + nmismatches_shift %d for %d indels at indel_pos %d\n", |
6875 | 6896 | *nmismatches_longcont,*nmismatches_shift,*indels,best_indel_pos)); |
6876 | 6897 | |
6895 | 6916 | int sum, best_sum = querylength; |
6896 | 6917 | int conti, shifti; |
6897 | 6918 | int best_indel_pos = -1; |
6898 | ||
6899 | #ifdef HAVE_ALLOCA | |
6900 | int *mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6901 | #else | |
6902 | int mismatch_positions_shift[MAX_READLENGTH+1]; | |
6903 | #endif | |
6904 | ||
6905 | 6919 | #ifdef OLD_END_INDELS |
6906 | 6920 | int indel_pos; |
6907 | 6921 | #else |
6908 | 6922 | int indel_pos_cont, indel_pos_shift; |
6923 | #endif | |
6924 | int *mismatch_positions_shift; | |
6925 | ||
6926 | #ifdef HAVE_ALLOCA | |
6927 | if (querylength <= MAX_STACK_READLENGTH) { | |
6928 | mismatch_positions_shift = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
6929 | } else { | |
6930 | mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6931 | } | |
6932 | #else | |
6933 | mismatch_positions_shift = (int *) MALLOC((querylength+1)*sizeof(int)); | |
6909 | 6934 | #endif |
6910 | 6935 | |
6911 | 6936 | |
7058 | 7083 | } |
7059 | 7084 | } |
7060 | 7085 | } |
7061 | shifti--; | |
7062 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7086 | if (--shifti >= 0) { | |
7087 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7088 | } | |
7063 | 7089 | |
7064 | 7090 | } else { |
7065 | 7091 | sum = shifti + conti; |
7080 | 7106 | } |
7081 | 7107 | } |
7082 | 7108 | conti++; |
7083 | shifti--; | |
7084 | indel_pos_cont = mismatch_positions_long[conti] - sep + 1; | |
7085 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7086 | ||
7109 | if (--shifti >= 0) { | |
7110 | indel_pos_cont = mismatch_positions_long[conti] - sep + 1; | |
7111 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7112 | } | |
7087 | 7113 | } |
7088 | 7114 | } |
7089 | 7115 | |
7259 | 7285 | } |
7260 | 7286 | } |
7261 | 7287 | } |
7262 | shifti--; | |
7263 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7288 | if (--shifti >= 0) { | |
7289 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7290 | } | |
7264 | 7291 | |
7265 | 7292 | } else { |
7266 | 7293 | sum = shifti + conti; |
7281 | 7308 | } |
7282 | 7309 | } |
7283 | 7310 | conti++; |
7284 | shifti--; | |
7285 | indel_pos_cont = mismatch_positions_long[conti] + 1; | |
7286 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7311 | if (--shifti >= 0) { | |
7312 | indel_pos_cont = mismatch_positions_long[conti] + 1; | |
7313 | indel_pos_shift = mismatch_positions_shift[shifti]; | |
7314 | } | |
7287 | 7315 | } |
7288 | 7316 | } |
7289 | 7317 | |
7312 | 7340 | } |
7313 | 7341 | } |
7314 | 7342 | |
7343 | #ifdef HAVE_ALLOCA | |
7344 | if (querylength <= MAX_STACK_READLENGTH) { | |
7345 | FREEA(mismatch_positions_shift); | |
7346 | } else { | |
7347 | FREE(mismatch_positions_shift); | |
7348 | } | |
7349 | #else | |
7350 | FREE(mismatch_positions_shift); | |
7351 | #endif | |
7315 | 7352 | |
7316 | 7353 | debug2e(printf("compute_end_indels_left returning with nmismatches_cont %d + nmismatches_shift %d for %d indels at indel_pos %d\n", |
7317 | 7354 | *nmismatches_longcont,*nmismatches_shift,*indels,best_indel_pos)); |
7342 | 7379 | int indels, query_indel_pos, indel_pos, breakpoint; |
7343 | 7380 | int nmismatches, nmismatches_long, nmismatches_longcont, nmismatches_shift; |
7344 | 7381 | int nmismatches1, nmismatches2; |
7382 | int *mismatch_positions; | |
7383 | ||
7345 | 7384 | |
7346 | 7385 | #ifdef HAVE_ALLOCA |
7347 | int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int)); | |
7348 | #else | |
7349 | int mismatch_positions[MAX_READLENGTH]; | |
7386 | if (querylength <= MAX_STACK_READLENGTH) { | |
7387 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7388 | } else { | |
7389 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7390 | } | |
7391 | #else | |
7392 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7350 | 7393 | #endif |
7351 | 7394 | |
7352 | 7395 | |
7484 | 7527 | } |
7485 | 7528 | } |
7486 | 7529 | |
7530 | #ifdef HAVE_ALLOCA | |
7531 | if (querylength <= MAX_STACK_READLENGTH) { | |
7532 | FREEA(mismatch_positions); | |
7533 | } else { | |
7534 | FREE(mismatch_positions); | |
7535 | } | |
7536 | #else | |
7537 | FREE(mismatch_positions); | |
7538 | #endif | |
7539 | ||
7487 | 7540 | return hits; |
7488 | 7541 | } |
7489 | 7542 | |
7508 | 7561 | int indels, query_indel_pos, indel_pos, breakpoint; |
7509 | 7562 | int nmismatches, nmismatches_long, nmismatches_longcont, nmismatches_shift; |
7510 | 7563 | int nmismatches1, nmismatches2; |
7564 | int *mismatch_positions; | |
7565 | ||
7511 | 7566 | |
7512 | 7567 | #ifdef HAVE_ALLOCA |
7513 | int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int)); | |
7514 | #else | |
7515 | int mismatch_positions[MAX_READLENGTH]; | |
7568 | if (querylength <= MAX_STACK_READLENGTH) { | |
7569 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7570 | } else { | |
7571 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7572 | } | |
7573 | #else | |
7574 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7516 | 7575 | #endif |
7517 | 7576 | |
7518 | 7577 | |
7649 | 7708 | } |
7650 | 7709 | } |
7651 | 7710 | } |
7711 | ||
7712 | #ifdef HAVE_ALLOCA | |
7713 | if (querylength <= MAX_STACK_READLENGTH) { | |
7714 | FREEA(mismatch_positions); | |
7715 | } else { | |
7716 | FREE(mismatch_positions); | |
7717 | } | |
7718 | #else | |
7719 | FREE(mismatch_positions); | |
7720 | #endif | |
7652 | 7721 | |
7653 | 7722 | return hits; |
7654 | 7723 | } |
7792 | 7861 | |
7793 | 7862 | |
7794 | 7863 | |
7795 | #if 0 | |
7796 | static void | |
7797 | find_segmentm_span (Segment_T segmentm, int max_mismatches_allowed, | |
7798 | int querylength, Compress_T query_compress, | |
7799 | Univcoord_T left, bool plusp, int genestrand, bool first_read_p) { | |
7800 | int nmismatches, i; | |
7801 | int leftspan, rightspan, bestspan; | |
7802 | #ifdef HAVE_ALLOCA | |
7803 | int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int)); | |
7804 | #else | |
7805 | int mismatch_positions[MAX_READLENGTH]; | |
7806 | #endif | |
7807 | ||
7808 | /* Find all mismatches */ | |
7809 | nmismatches = Genome_mismatches_left(mismatch_positions,/*max_mismatches*/querylength, | |
7810 | query_compress,left,/*pos5*/0,/*pos3*/querylength, | |
7811 | plusp,genestrand,first_read_p); | |
7812 | ||
7813 | if (nmismatches < max_mismatches_allowed) { | |
7814 | segmentm->leftspan = 0; | |
7815 | segmentm->rightspan = querylength; | |
7816 | } else { | |
7817 | segmentm->leftspan = 0; | |
7818 | bestspan = segmentm->rightspan = mismatch_positions[max_mismatches_allowed] + /*slop*/ 1; | |
7819 | for (i = 0; i < nmismatches - max_mismatches_allowed; i++) { | |
7820 | leftspan = mismatch_positions[i]; | |
7821 | rightspan = mismatch_positions[i + max_mismatches_allowed + 1] + /*slop*/ 1; | |
7822 | if (rightspan - leftspan > bestspan) { | |
7823 | segmentm->leftspan = leftspan; | |
7824 | segmentm->rightspan = rightspan; | |
7825 | bestspan = rightspan - leftspan; | |
7826 | } else if (rightspan - leftspan == bestspan) { | |
7827 | segmentm->rightspan = rightspan; | |
7828 | } | |
7829 | } | |
7830 | } | |
7831 | return; | |
7832 | } | |
7833 | #endif | |
7834 | ||
7835 | ||
7836 | 7864 | /* Copied from sarray-read.c */ |
7837 | 7865 | static int |
7838 | 7866 | donor_match_length_cmp (const void *a, const void *b) { |
7883 | 7911 | int nmismatches_left, nmismatches_right; |
7884 | 7912 | int segmenti_donor_nknown, segmentj_acceptor_nknown, |
7885 | 7913 | segmentj_antidonor_nknown, segmenti_antiacceptor_nknown; |
7914 | int *mismatch_positions_left, *mismatch_positions_right; | |
7915 | int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos, | |
7916 | *segmenti_donor_knowni, *segmentj_acceptor_knowni, | |
7917 | *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni; | |
7886 | 7918 | |
7887 | 7919 | #ifdef HAVE_ALLOCA |
7888 | int *mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int)); | |
7889 | int *mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int)); | |
7890 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7891 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7892 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7893 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7894 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7895 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7896 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7897 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7898 | #else | |
7899 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
7900 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
7901 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
7902 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
7903 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1]; | |
7920 | if (querylength <= MAX_STACK_READLENGTH) { | |
7921 | mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int)); | |
7922 | mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int)); | |
7923 | segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7924 | segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7925 | segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7926 | segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7927 | segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7928 | segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7929 | segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7930 | segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
7931 | } else { | |
7932 | mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int)); | |
7933 | mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int)); | |
7934 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7935 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7936 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7937 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7938 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7939 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7940 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7941 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7942 | } | |
7943 | #else | |
7944 | mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int)); | |
7945 | mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int)); | |
7946 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7947 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7948 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7949 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7950 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7951 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7952 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7953 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
7904 | 7954 | #endif |
7905 | 7955 | |
7906 | 7956 | Chrpos_T max_distance; |
8125 | 8175 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
8126 | 8176 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
8127 | 8177 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
8128 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8129 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8178 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8179 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8130 | 8180 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
8131 | 8181 | best_nmismatches = nmismatches; |
8132 | 8182 | } |
8142 | 8192 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
8143 | 8193 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
8144 | 8194 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
8145 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8146 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8195 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8196 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8147 | 8197 | n_good_spliceends += 1; |
8148 | 8198 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8149 | 8199 | } else { |
8159 | 8209 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || |
8160 | 8210 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
8161 | 8211 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
8162 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8163 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8212 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8213 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8164 | 8214 | n_good_spliceends += 1; |
8165 | 8215 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8166 | 8216 | } else { |
8227 | 8277 | for (k = i; k < j; k++) { |
8228 | 8278 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
8229 | 8279 | #ifdef LARGE_GENOMES |
8230 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
8231 | #else | |
8232 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
8280 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
8281 | #else | |
8282 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
8233 | 8283 | #endif |
8234 | 8284 | amb_knowni = Intlist_push(amb_knowni,-1); |
8235 | 8285 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
8236 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
8286 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
8237 | 8287 | } |
8238 | 8288 | |
8239 | 8289 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
8298 | 8348 | for (k = i; k < j; k++) { |
8299 | 8349 | donor = Stage3end_substring_donor(hitarray[k]); |
8300 | 8350 | #ifdef LARGE_GENOMES |
8301 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
8302 | #else | |
8303 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
8351 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
8352 | #else | |
8353 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
8304 | 8354 | #endif |
8305 | 8355 | amb_knowni = Intlist_push(amb_knowni,-1); |
8306 | 8356 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
8307 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
8357 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
8308 | 8358 | } |
8309 | 8359 | |
8310 | 8360 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
8358 | 8408 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
8359 | 8409 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
8360 | 8410 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
8361 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8362 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8411 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8412 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8363 | 8413 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
8364 | 8414 | best_nmismatches = nmismatches; |
8365 | 8415 | } |
8377 | 8427 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
8378 | 8428 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
8379 | 8429 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
8380 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8381 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8430 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8431 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8382 | 8432 | n_good_spliceends += 1; |
8383 | 8433 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8384 | 8434 | } else { |
8396 | 8446 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
8397 | 8447 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
8398 | 8448 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
8399 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8400 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8449 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8450 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8401 | 8451 | n_good_spliceends += 1; |
8402 | 8452 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8403 | 8453 | } else { |
8464 | 8514 | for (k = i; k < j; k++) { |
8465 | 8515 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
8466 | 8516 | #ifdef LARGE_GENOMES |
8467 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
8468 | #else | |
8469 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
8517 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
8518 | #else | |
8519 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
8470 | 8520 | #endif |
8471 | 8521 | amb_knowni = Intlist_push(amb_knowni,-1); |
8472 | 8522 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
8473 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
8523 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
8474 | 8524 | } |
8475 | 8525 | |
8476 | 8526 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
8535 | 8585 | for (k = i; k < j; k++) { |
8536 | 8586 | donor = Stage3end_substring_donor(hitarray[k]); |
8537 | 8587 | #ifdef LARGE_GENOMES |
8538 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
8539 | #else | |
8540 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
8588 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
8589 | #else | |
8590 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
8541 | 8591 | #endif |
8542 | 8592 | amb_knowni = Intlist_push(amb_knowni,-1); |
8543 | 8593 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
8544 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
8594 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
8545 | 8595 | } |
8546 | 8596 | |
8547 | 8597 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
8587 | 8637 | } |
8588 | 8638 | } |
8589 | 8639 | |
8640 | #ifdef HAVE_ALLOCA | |
8641 | if (querylength <= MAX_STACK_READLENGTH) { | |
8642 | FREEA(mismatch_positions_left); | |
8643 | FREEA(mismatch_positions_right); | |
8644 | FREEA(segmenti_donor_knownpos); | |
8645 | FREEA(segmentj_acceptor_knownpos); | |
8646 | FREEA(segmentj_antidonor_knownpos); | |
8647 | FREEA(segmenti_antiacceptor_knownpos); | |
8648 | FREEA(segmenti_donor_knowni); | |
8649 | FREEA(segmentj_acceptor_knowni); | |
8650 | FREEA(segmentj_antidonor_knowni); | |
8651 | FREEA(segmenti_antiacceptor_knowni); | |
8652 | } else { | |
8653 | FREE(mismatch_positions_left); | |
8654 | FREE(mismatch_positions_right); | |
8655 | FREE(segmenti_donor_knownpos); | |
8656 | FREE(segmentj_acceptor_knownpos); | |
8657 | FREE(segmentj_antidonor_knownpos); | |
8658 | FREE(segmenti_antiacceptor_knownpos); | |
8659 | FREE(segmenti_donor_knowni); | |
8660 | FREE(segmentj_acceptor_knowni); | |
8661 | FREE(segmentj_antidonor_knowni); | |
8662 | FREE(segmenti_antiacceptor_knowni); | |
8663 | } | |
8664 | #else | |
8665 | FREE(mismatch_positions_left); | |
8666 | FREE(mismatch_positions_right); | |
8667 | FREE(segmenti_donor_knownpos); | |
8668 | FREE(segmentj_acceptor_knownpos); | |
8669 | FREE(segmentj_antidonor_knownpos); | |
8670 | FREE(segmenti_antiacceptor_knownpos); | |
8671 | FREE(segmenti_donor_knowni); | |
8672 | FREE(segmentj_acceptor_knowni); | |
8673 | FREE(segmentj_antidonor_knowni); | |
8674 | FREE(segmenti_antiacceptor_knowni); | |
8675 | #endif | |
8676 | ||
8590 | 8677 | debug(printf("Finished find_singlesplices_plus with %d hits and %d lowprob\n", |
8591 | 8678 | List_length(hits),List_length(*lowprob))); |
8592 | 8679 | |
8606 | 8693 | int nmismatches_left, nmismatches_right; |
8607 | 8694 | int segmenti_donor_nknown, segmentj_acceptor_nknown, |
8608 | 8695 | segmentj_antidonor_nknown, segmenti_antiacceptor_nknown; |
8609 | ||
8696 | int *mismatch_positions_left, *mismatch_positions_right; | |
8697 | int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos, | |
8698 | *segmenti_donor_knowni, *segmentj_acceptor_knowni, | |
8699 | *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni; | |
8700 | ||
8610 | 8701 | #ifdef HAVE_ALLOCA |
8611 | int *mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int)); | |
8612 | int *mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int)); | |
8613 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8614 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8615 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8616 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8617 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8618 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8619 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8620 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8621 | #else | |
8622 | int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH]; | |
8623 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
8624 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
8625 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
8626 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1]; | |
8702 | if (querylength <= MAX_STACK_READLENGTH) { | |
8703 | mismatch_positions_left = (int *) ALLOCA(querylength*sizeof(int)); | |
8704 | mismatch_positions_right = (int *) ALLOCA(querylength*sizeof(int)); | |
8705 | segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8706 | segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8707 | segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8708 | segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8709 | segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8710 | segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8711 | segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8712 | segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
8713 | } else { | |
8714 | mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int)); | |
8715 | mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int)); | |
8716 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8717 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8718 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8719 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8720 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8721 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8722 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8723 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8724 | } | |
8725 | #else | |
8726 | mismatch_positions_left = (int *) MALLOC(querylength*sizeof(int)); | |
8727 | mismatch_positions_right = (int *) MALLOC(querylength*sizeof(int)); | |
8728 | segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8729 | segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8730 | segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8731 | segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8732 | segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8733 | segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8734 | segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8735 | segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
8627 | 8736 | #endif |
8628 | 8737 | |
8629 | 8738 | Chrpos_T max_distance; |
8847 | 8956 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
8848 | 8957 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
8849 | 8958 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
8850 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8851 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8959 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8960 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8852 | 8961 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
8853 | 8962 | best_nmismatches = nmismatches; |
8854 | 8963 | } |
8864 | 8973 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && |
8865 | 8974 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
8866 | 8975 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
8867 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8868 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8976 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8977 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8869 | 8978 | n_good_spliceends += 1; |
8870 | 8979 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8871 | 8980 | } else { |
8881 | 8990 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || |
8882 | 8991 | Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) { |
8883 | 8992 | debug7(printf("accepting distance %d, probabilities %f and %f\n", |
8884 | Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
8885 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
8993 | Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
8994 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
8886 | 8995 | n_good_spliceends += 1; |
8887 | 8996 | accepted_hits = List_push(accepted_hits,(void *) hit); |
8888 | 8997 | } else { |
8949 | 9058 | for (k = i; k < j; k++) { |
8950 | 9059 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
8951 | 9060 | #ifdef LARGE_GENOMES |
8952 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
8953 | #else | |
8954 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
9061 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
9062 | #else | |
9063 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
8955 | 9064 | #endif |
8956 | 9065 | amb_knowni = Intlist_push(amb_knowni,-1); |
8957 | 9066 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
8958 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
9067 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
8959 | 9068 | } |
8960 | 9069 | |
8961 | 9070 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
9020 | 9129 | for (k = i; k < j; k++) { |
9021 | 9130 | donor = Stage3end_substring_donor(hitarray[k]); |
9022 | 9131 | #ifdef LARGE_GENOMES |
9023 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
9024 | #else | |
9025 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
9132 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
9133 | #else | |
9134 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
9026 | 9135 | #endif |
9027 | 9136 | amb_knowni = Intlist_push(amb_knowni,-1); |
9028 | 9137 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
9029 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
9138 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
9030 | 9139 | } |
9031 | 9140 | |
9032 | 9141 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
9080 | 9189 | Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)), |
9081 | 9190 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
9082 | 9191 | Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)), |
9083 | Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
9084 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
9192 | Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
9193 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
9085 | 9194 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { |
9086 | 9195 | best_nmismatches = nmismatches; |
9087 | 9196 | } |
9099 | 9208 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
9100 | 9209 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
9101 | 9210 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
9102 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
9103 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
9211 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
9212 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
9104 | 9213 | n_good_spliceends += 1; |
9105 | 9214 | accepted_hits = List_push(accepted_hits,(void *) hit); |
9106 | 9215 | } else { |
9118 | 9227 | debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n", |
9119 | 9228 | Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)), |
9120 | 9229 | Substring_match_length_orig(Stage3end_substring_acceptor(hit)), |
9121 | Substring_chimera_prob(Stage3end_substring_donor(hit)), | |
9122 | Substring_chimera_prob(Stage3end_substring_acceptor(hit)))); | |
9230 | Substring_siteD_prob(Stage3end_substring_donor(hit)), | |
9231 | Substring_siteA_prob(Stage3end_substring_acceptor(hit)))); | |
9123 | 9232 | n_good_spliceends += 1; |
9124 | 9233 | accepted_hits = List_push(accepted_hits,(void *) hit); |
9125 | 9234 | } else { |
9186 | 9295 | for (k = i; k < j; k++) { |
9187 | 9296 | acceptor = Stage3end_substring_acceptor(hitarray[k]); |
9188 | 9297 | #ifdef LARGE_GENOMES |
9189 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor)); | |
9190 | #else | |
9191 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor)); | |
9298 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
9299 | #else | |
9300 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor)); | |
9192 | 9301 | #endif |
9193 | 9302 | amb_knowni = Intlist_push(amb_knowni,-1); |
9194 | 9303 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor)); |
9195 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor)); | |
9304 | amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor)); | |
9196 | 9305 | } |
9197 | 9306 | |
9198 | 9307 | nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor); |
9256 | 9365 | for (k = i; k < j; k++) { |
9257 | 9366 | donor = Stage3end_substring_donor(hitarray[k]); |
9258 | 9367 | #ifdef LARGE_GENOMES |
9259 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor)); | |
9260 | #else | |
9261 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor)); | |
9368 | ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor)); | |
9369 | #else | |
9370 | ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor)); | |
9262 | 9371 | #endif |
9263 | 9372 | amb_knowni = Intlist_push(amb_knowni,-1); |
9264 | 9373 | amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor)); |
9265 | amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor)); | |
9374 | amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor)); | |
9266 | 9375 | } |
9267 | 9376 | |
9268 | 9377 | nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor); |
9308 | 9417 | } |
9309 | 9418 | } |
9310 | 9419 | |
9420 | #ifdef HAVE_ALLOCA | |
9421 | if (querylength <= MAX_STACK_READLENGTH) { | |
9422 | FREEA(mismatch_positions_left); | |
9423 | FREEA(mismatch_positions_right); | |
9424 | FREEA(segmenti_donor_knownpos); | |
9425 | FREEA(segmentj_acceptor_knownpos); | |
9426 | FREEA(segmentj_antidonor_knownpos); | |
9427 | FREEA(segmenti_antiacceptor_knownpos); | |
9428 | FREEA(segmenti_donor_knowni); | |
9429 | FREEA(segmentj_acceptor_knowni); | |
9430 | FREEA(segmentj_antidonor_knowni); | |
9431 | FREEA(segmenti_antiacceptor_knowni); | |
9432 | } else { | |
9433 | FREE(mismatch_positions_left); | |
9434 | FREE(mismatch_positions_right); | |
9435 | FREE(segmenti_donor_knownpos); | |
9436 | FREE(segmentj_acceptor_knownpos); | |
9437 | FREE(segmentj_antidonor_knownpos); | |
9438 | FREE(segmenti_antiacceptor_knownpos); | |
9439 | FREE(segmenti_donor_knowni); | |
9440 | FREE(segmentj_acceptor_knowni); | |
9441 | FREE(segmentj_antidonor_knowni); | |
9442 | FREE(segmenti_antiacceptor_knowni); | |
9443 | } | |
9444 | #else | |
9445 | FREE(mismatch_positions_left); | |
9446 | FREE(mismatch_positions_right); | |
9447 | FREE(segmenti_donor_knownpos); | |
9448 | FREE(segmentj_acceptor_knownpos); | |
9449 | FREE(segmentj_antidonor_knownpos); | |
9450 | FREE(segmenti_antiacceptor_knownpos); | |
9451 | FREE(segmenti_donor_knowni); | |
9452 | FREE(segmentj_acceptor_knowni); | |
9453 | FREE(segmentj_antidonor_knowni); | |
9454 | FREE(segmenti_antiacceptor_knowni); | |
9455 | #endif | |
9456 | ||
9311 | 9457 | debug(printf("Finished find_singlesplices_minus with %d hits and %d lowprob\n", |
9312 | 9458 | List_length(hits),List_length(*lowprob))); |
9313 | 9459 | |
9401 | 9547 | #endif |
9402 | 9548 | |
9403 | 9549 | |
9404 | #if 0 | |
9405 | static List_T | |
9406 | find_doublesplices (int *found_score, List_T hits, List_T *lowprob, | |
9407 | Segment_T *spliceable, int nspliceable, struct Segment_T *segments, | |
9408 | char *queryptr, int querylength, int query_lastpos, Compress_T query_compress, | |
9409 | Chrpos_T max_distance, int splicing_penalty, int min_shortend, | |
9410 | int max_mismatches_allowed, bool pairedp, bool first_read_p, | |
9411 | bool plusp, int genestrand, bool subs_or_indels_p) { | |
9412 | int j, j1, j2, joffset, jj; | |
9413 | ||
9414 | Segment_T segmenti, segmentj, segmentm, segmenti_start, segmentj_end, *ptr; | |
9415 | List_T potentiali, potentialj, q, r; | |
9416 | Univcoord_T segmenti_left, segmentj_left, segmentm_left; | |
9417 | int segmenti_donor_nknown, segmentj_acceptor_nknown, | |
9418 | segmentj_antidonor_nknown, segmenti_antiacceptor_nknown, | |
9419 | segmentm_donor_nknown, segmentm_acceptor_nknown, | |
9420 | segmentm_antidonor_nknown, segmentm_antiacceptor_nknown; | |
9421 | ||
9422 | #ifdef HAVE_ALLOCA | |
9423 | int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9424 | int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9425 | int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9426 | int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9427 | int *segmentm_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9428 | int *segmentm_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9429 | int *segmentm_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9430 | int *segmentm_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9431 | int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9432 | int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9433 | int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9434 | int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9435 | int *segmentm_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9436 | int *segmentm_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9437 | int *segmentm_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9438 | int *segmentm_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9439 | #else | |
9440 | int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1], | |
9441 | segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1], | |
9442 | segmentm_donor_knownpos[MAX_READLENGTH+1], segmentm_acceptor_knownpos[MAX_READLENGTH+1], | |
9443 | segmentm_antidonor_knownpos[MAX_READLENGTH+1], segmentm_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
9444 | int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1], | |
9445 | segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1], | |
9446 | segmentm_donor_knowni[MAX_READLENGTH+1], segmentm_acceptor_knowni[MAX_READLENGTH+1], | |
9447 | segmentm_antidonor_knowni[MAX_READLENGTH+1], segmentm_antiacceptor_knowni[MAX_READLENGTH+1]; | |
9448 | #endif | |
9449 | ||
9450 | #ifdef LARGE_GENOMES | |
9451 | Uint8list_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor; | |
9452 | #else | |
9453 | Uintlist_T donor_ambcoords, acceptor_ambcoords, ambcoords_donor, ambcoords_acceptor; | |
9454 | #endif | |
9455 | Intlist_T splicesites_i_left, splicesites_i_right; | |
9456 | Intlist_T nmismatches_list_left, nmismatches_list_right; | |
9457 | bool ambp_left, ambp_right; | |
9458 | int sensedir; | |
9459 | /* int *floors_from_neg3, *floors_to_pos3; */ | |
9460 | ||
9461 | int nmismatches_shortexon_left, nmismatches_shortexon_middle, nmismatches_shortexon_right; | |
9462 | int amb_length_donor, amb_length_acceptor; | |
9463 | int best_left_j, best_right_j; | |
9464 | bool shortexon_orig_plusp, shortexon_orig_minusp, saw_antidonor_p, saw_acceptor_p; | |
9465 | int leftpos, rightpos; | |
9466 | Substring_T donor, acceptor, shortexon; | |
9467 | ||
9468 | int nhits_local /*= 0*/, npotential_left, npotential_right; | |
9469 | int donor_length, acceptor_length; | |
9470 | List_T accepted_hits, rejected_hits, single_ambig_hits; | |
9471 | List_T spliceends, p; | |
9472 | Stage3end_T hit, *hitarray; | |
9473 | int best_nmismatches, nmismatches; | |
9474 | int n_good_spliceends, n, i, k; | |
9475 | double best_prob, prob; | |
9476 | Univcoord_T lastpos; | |
9477 | Intlist_T donor_amb_knowni, acceptor_amb_knowni, donor_amb_nmismatches, acceptor_amb_nmismatches; | |
9478 | Doublelist_T donor_amb_probs, acceptor_amb_probs, probs_donor, probs_acceptor; | |
9479 | ||
9480 | ||
9481 | debug(printf("*** Starting find_known_doublesplices on %d segments ***\n",nspliceable)); | |
9482 | debug(printf("Initially have %d hits\n",List_length(hits))); | |
9483 | ||
9484 | /* floors_from_neg3 = floors->scorefrom[-index1interval]; */ | |
9485 | /* floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; */ | |
9486 | ||
9487 | for (ptr = spliceable; ptr < &(spliceable[nspliceable]); ptr++) { | |
9488 | segmentm = *ptr; | |
9489 | if (1 || segmentm->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */ | |
9490 | segmentm_left = segmentm->diagonal - querylength; | |
9491 | ||
9492 | shortexon_orig_plusp = shortexon_orig_minusp = false; | |
9493 | saw_acceptor_p = saw_antidonor_p = false; | |
9494 | ||
9495 | segmentm_donor_nknown = 0; | |
9496 | segmentm_acceptor_nknown = 0; | |
9497 | segmentm_antidonor_nknown = 0; | |
9498 | segmentm_antiacceptor_nknown = 0; | |
9499 | ||
9500 | if ((joffset = segmentm->splicesites_i) >= 0) { | |
9501 | j = joffset; | |
9502 | while (j < nsplicesites && splicesites[j] < segmentm->diagonal) { | |
9503 | if (splicetypes[j] == DONOR) { | |
9504 | debug4k(printf("Setting known donor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j])); | |
9505 | segmentm_donor_knownpos[segmentm_donor_nknown] = splicesites[j] - segmentm_left; | |
9506 | segmentm_donor_knowni[segmentm_donor_nknown++] = j; | |
9507 | if (saw_acceptor_p == true) { | |
9508 | /* acceptor...donor */ | |
9509 | shortexon_orig_plusp = true; | |
9510 | } | |
9511 | } else if (splicetypes[j] == ANTIACCEPTOR) { | |
9512 | debug4k(printf("Setting known antiacceptor %d for segmentm at %llu\n",j,(unsigned long long) splicesites[j])); | |
9513 | segmentm_antiacceptor_knownpos[segmentm_antiacceptor_nknown] = splicesites[j] - segmentm_left; | |
9514 | segmentm_antiacceptor_knowni[segmentm_antiacceptor_nknown++] = j; | |
9515 | if (saw_antidonor_p == true) { | |
9516 | /* antidonor...antiacceptor */ | |
9517 | shortexon_orig_minusp = true; | |
9518 | } | |
9519 | } else if (splicetypes[j] == ACCEPTOR) { | |
9520 | debug4k(printf("Saw known acceptor at %llu\n",(unsigned long long) splicesites[j])); | |
9521 | segmentm_acceptor_knownpos[segmentm_acceptor_nknown] = splicesites[j] - segmentm_left; | |
9522 | segmentm_acceptor_knowni[segmentm_acceptor_nknown++] = j; | |
9523 | saw_acceptor_p = true; | |
9524 | } else if (splicetypes[j] == ANTIDONOR) { | |
9525 | debug4k(printf("Saw known antidonor at %llu\n",(unsigned long long) splicesites[j])); | |
9526 | segmentm_antidonor_knownpos[segmentm_antidonor_nknown] = splicesites[j] - segmentm_left; | |
9527 | segmentm_antidonor_knowni[segmentm_antidonor_nknown++] = j; | |
9528 | saw_antidonor_p = true; | |
9529 | } | |
9530 | j++; | |
9531 | } | |
9532 | } | |
9533 | ||
9534 | /* Novel splicing. Do not alter j. */ | |
9535 | /* Still necessary to check segmentm querypos to achieve speed */ | |
9536 | if (novelsplicingp && | |
9537 | segmentm->querypos3 >= index1part && segmentm->querypos5 <= query_lastpos - index1part && | |
9538 | segmentm->left_splice_p == true && segmentm->right_splice_p == true) { | |
9539 | debug4d(printf("segment diagonal %llu, querypos %d..%d\n", | |
9540 | (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3)); | |
9541 | ||
9542 | spliceends = (List_T) NULL; | |
9543 | ||
9544 | /* Identify potential segmenti for segmentm */ | |
9545 | segmenti_start = segmentm-1; | |
9546 | while ( | |
9547 | /* Cannot use marker segments going leftward */ | |
9548 | segmenti_start >= &(segments[0]) && | |
9549 | segmenti_start->diagonal < (Univcoord_T) -1 && /* Needs to be next criterion, since we initialize only segments[0]->diagonal */ | |
9550 | segmenti_start->chrnum == segmentm->chrnum && | |
9551 | segmentm->diagonal <= segmenti_start->diagonal + max_distance) { | |
9552 | segmenti_start--; | |
9553 | } | |
9554 | ||
9555 | /* Identify potential segmentj for segmentm */ | |
9556 | segmentj_end = segmentm+1; | |
9557 | while ( | |
9558 | #ifdef NO_MARKER_SEGMENTS | |
9559 | segmentj_end < &(segments[nsegments]) && segmentj_end->chrnum == segmentm->chrnum && | |
9560 | #endif | |
9561 | segmentj_end->diagonal <= segmentm->diagonal + max_distance) { | |
9562 | segmentj_end++; | |
9563 | } | |
9564 | ||
9565 | potentiali = (List_T) NULL; | |
9566 | potentialj = (List_T) NULL; | |
9567 | npotential_left = 0; | |
9568 | npotential_right = 0; | |
9569 | if ((segmentm - segmenti_start) * (segmentj_end - segmentm) >= MAX_LOCALSPLICING_POTENTIAL) { | |
9570 | /* Too many to check */ | |
9571 | /* segmenti_start = segmentm-1 - MAX_LOCALSPLICING_POTENTIAL; */ | |
9572 | /* segmentj_end = segmentm+1 + MAX_LOCALSPLICING_POTENTIAL; */ | |
9573 | segmenti = segmenti_start; /* Don't process any */ | |
9574 | segmentj = segmentj_end; /* Don't process any */ | |
9575 | } else { | |
9576 | segmenti = segmentm-1; | |
9577 | segmentj = segmentm+1; | |
9578 | } | |
9579 | ||
9580 | for ( ; segmenti > segmenti_start; segmenti--) { | |
9581 | debug4d(printf("local left? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n", | |
9582 | (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3, | |
9583 | (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3)); | |
9584 | /* i5 i3 m5 m3 */ | |
9585 | assert(segmenti->diagonal < segmentm->diagonal); | |
9586 | if (segmenti->leftmost < 0) { | |
9587 | /* Failed outer floor test in find_singlesplices */ | |
9588 | } else if (plusp == true && segmenti->querypos3 >= segmentm->querypos5) { | |
9589 | debug4d(printf("Bad querypos\n")); | |
9590 | } else if (plusp == false && segmentm->querypos3 >= segmenti->querypos5) { | |
9591 | debug4d(printf("Bad querypos\n")); | |
9592 | } else if (segmenti->diagonal + min_intronlength > segmentm->diagonal) { | |
9593 | debug4d(printf("Too short\n")); | |
9594 | } else { | |
9595 | potentiali = List_push(potentiali,(void *) segmenti); | |
9596 | npotential_left++; | |
9597 | debug4d(printf("Potential left #%d: %llu\n",npotential_left,(unsigned long long) segmenti->diagonal)); | |
9598 | } | |
9599 | } | |
9600 | ||
9601 | for ( ; segmentj < segmentj_end; segmentj++) { | |
9602 | debug4d(printf("local right? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d\n", | |
9603 | (unsigned long long) segmentm->diagonal,segmentm->querypos5,segmentm->querypos3, | |
9604 | (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3)); | |
9605 | /* m5 m3 j5 j3 */ | |
9606 | assert(segmentm->diagonal < segmentj->diagonal); | |
9607 | if (segmentj->rightmost < 0) { | |
9608 | /* Failed outer floor test in find_singlesplices */ | |
9609 | } else if (plusp == true && segmentm->querypos3 >= segmentj->querypos5) { | |
9610 | debug4d(printf("Bad querypos\n")); | |
9611 | } else if (plusp == false && segmentj->querypos3 >= segmentm->querypos5) { | |
9612 | debug4d(printf("Bad querypos\n")); | |
9613 | } else if (segmentm->diagonal + min_intronlength > segmentj->diagonal) { | |
9614 | debug4d(printf("Too short\n")); | |
9615 | } else { | |
9616 | potentialj = List_push(potentialj,(void *) segmentj); | |
9617 | npotential_right++; | |
9618 | debug4d(printf("Potential right #%d: %llu\n",npotential_right,(unsigned long long) segmentj->diagonal)); | |
9619 | } | |
9620 | } | |
9621 | ||
9622 | if (npotential_left > 0 && npotential_right > 0) { | |
9623 | segmentm_donor_knownpos[segmentm_donor_nknown] = querylength; | |
9624 | segmentm_acceptor_knownpos[segmentm_acceptor_nknown] = querylength; | |
9625 | segmentm_antidonor_knownpos[segmentm_antidonor_nknown] = querylength; | |
9626 | segmentm_antiacceptor_knownpos[segmentm_antiacceptor_nknown] = querylength; | |
9627 | ||
9628 | for (q = potentiali; q != NULL; q = List_next(q)) { | |
9629 | segmenti = (Segment_T) List_head(q); | |
9630 | segmenti_left = segmenti->diagonal - querylength; | |
9631 | ||
9632 | /* Set known sites for segmenti */ | |
9633 | segmenti_donor_nknown = 0; | |
9634 | segmenti_antiacceptor_nknown = 0; | |
9635 | if ((jj = segmenti->splicesites_i) >= 0) { | |
9636 | while (jj < nsplicesites && splicesites[jj] < segmenti->diagonal) { | |
9637 | if (splicetypes[jj] == DONOR) { | |
9638 | debug4d(printf("Setting known donor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj])); | |
9639 | segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[jj] - segmenti_left; | |
9640 | segmenti_donor_knowni[segmenti_donor_nknown++] = jj; | |
9641 | } else if (splicetypes[jj] == ANTIACCEPTOR) { | |
9642 | debug4d(printf("Setting known antiacceptor %d for segmenti at %llu\n",jj,(unsigned long long) splicesites[jj])); | |
9643 | segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[jj] - segmenti_left; | |
9644 | segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = jj; | |
9645 | } | |
9646 | jj++; | |
9647 | } | |
9648 | } | |
9649 | segmenti_donor_knownpos[segmenti_donor_nknown] = querylength; | |
9650 | segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength; | |
9651 | ||
9652 | ||
9653 | for (r = potentialj; r != NULL; r = List_next(r)) { | |
9654 | segmentj = (Segment_T) List_head(r); | |
9655 | ||
9656 | debug4d(printf("Doublesplice span test (%d mismatches allowed): %d mismatches found from leftmost %d to j.rightmost %d\n", | |
9657 | max_mismatches_allowed, | |
9658 | Genome_count_mismatches_substring(query_compress,segmentm_left, | |
9659 | /*pos5*/segmenti->leftmost,/*pos3*/segmentj->rightmost, | |
9660 | plusp,genestrand,first_read_p), | |
9661 | segmenti->leftmost,segmentj->rightmost)); | |
9662 | ||
9663 | if (segmenti->leftmost >= segmentj->rightmost) { | |
9664 | debug4d(printf("Double splice is not possible with pos5 %d > pos3 %d\n", | |
9665 | segmenti->leftmost,segmentj->rightmost)); | |
9666 | } else if (Genome_count_mismatches_limit(query_compress,segmentm_left, | |
9667 | /*pos5*/segmenti->leftmost,/*pos3*/segmentj->rightmost, | |
9668 | max_mismatches_allowed,plusp,genestrand) <= max_mismatches_allowed) { | |
9669 | debug4d(printf("Double splice is possible\n")); | |
9670 | segmentj_left = segmentj->diagonal - querylength; | |
9671 | ||
9672 | /* Set known sites for segmentj */ | |
9673 | segmentj_acceptor_nknown = 0; | |
9674 | segmentj_antidonor_nknown = 0; | |
9675 | if ((jj = segmentj->splicesites_i) >= 0) { | |
9676 | while (jj < nsplicesites && splicesites[jj] < segmentj->diagonal) { | |
9677 | if (splicetypes[jj] == ACCEPTOR) { | |
9678 | debug4d(printf("Setting known acceptor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj])); | |
9679 | segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[jj] - segmentj_left; | |
9680 | segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = jj; | |
9681 | } else if (splicetypes[jj] == ANTIDONOR) { | |
9682 | debug4d(printf("Setting known antidonor %d for segmentj at %llu\n",jj,(unsigned long long) splicesites[jj])); | |
9683 | segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[jj] - segmentj_left; | |
9684 | segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = jj; | |
9685 | } | |
9686 | jj++; | |
9687 | } | |
9688 | } | |
9689 | segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength; | |
9690 | segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength; | |
9691 | ||
9692 | debug4d(printf(" => checking for double splice: Splice_solve_double\n")); | |
9693 | spliceends = Splice_solve_double(&(*found_score),&nhits_local,spliceends,&(*lowprob), | |
9694 | &segmenti->usedp,&segmentm->usedp,&segmentj->usedp, | |
9695 | /*segmenti_left*/segmenti->diagonal - querylength, | |
9696 | /*segmentm_left*/segmentm->diagonal - querylength, | |
9697 | /*segmentj_left*/segmentj->diagonal - querylength, | |
9698 | segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength, | |
9699 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength, | |
9700 | segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength, | |
9701 | querylength,query_compress, | |
9702 | segmenti_donor_knownpos,segmentm_acceptor_knownpos,segmentm_donor_knownpos,segmentj_acceptor_knownpos, | |
9703 | segmentj_antidonor_knownpos,segmentm_antiacceptor_knownpos,segmentm_antidonor_knownpos,segmenti_antiacceptor_knownpos, | |
9704 | segmenti_donor_knowni,segmentm_acceptor_knowni,segmentm_donor_knowni,segmentj_acceptor_knowni, | |
9705 | segmentj_antidonor_knowni,segmentm_antiacceptor_knowni,segmentm_antidonor_knowni,segmenti_antiacceptor_knowni, | |
9706 | segmenti_donor_nknown,segmentm_acceptor_nknown,segmentm_donor_nknown,segmentj_acceptor_nknown, | |
9707 | segmentj_antidonor_nknown,segmentm_antiacceptor_nknown,segmentm_antidonor_nknown,segmenti_antiacceptor_nknown, | |
9708 | splicing_penalty,max_mismatches_allowed,plusp,genestrand, | |
9709 | subs_or_indels_p,/*sarrayp*/false); | |
9710 | } | |
9711 | } | |
9712 | } | |
9713 | } | |
9714 | ||
9715 | List_free(&potentialj); | |
9716 | List_free(&potentiali); | |
9717 | ||
9718 | /* Process results for segmentm. */ | |
9719 | if (spliceends != NULL) { | |
9720 | best_nmismatches = querylength; | |
9721 | best_prob = 0.0; | |
9722 | for (p = spliceends; p != NULL; p = List_next(p)) { | |
9723 | hit = (Stage3end_T) List_head(p); | |
9724 | debug7(printf("analyzing distance %d, nmismatches %d, probability %f\n", | |
9725 | Stage3end_distance(hit),Stage3end_nmismatches_whole(hit), | |
9726 | Stage3end_shortexon_prob(hit))); | |
9727 | if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) { | |
9728 | best_nmismatches = nmismatches; | |
9729 | } | |
9730 | if ((prob = Stage3end_shortexon_prob(hit)) > best_prob) { | |
9731 | best_prob = prob; | |
9732 | } | |
9733 | } | |
9734 | ||
9735 | n_good_spliceends = 0; | |
9736 | accepted_hits = rejected_hits = (List_T) NULL; | |
9737 | for (p = spliceends; p != NULL; p = List_next(p)) { | |
9738 | hit = (Stage3end_T) List_head(p); | |
9739 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP && | |
9740 | (Stage3end_shortexon_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP)) { | |
9741 | debug7(printf("accepting distance %d, nmismatches %d, probability %f\n", | |
9742 | Stage3end_distance(hit),Stage3end_nmismatches_whole(hit), | |
9743 | Stage3end_shortexon_prob(hit))); | |
9744 | n_good_spliceends += 1; | |
9745 | accepted_hits = List_push(accepted_hits,(void *) hit); | |
9746 | } else { | |
9747 | rejected_hits = List_push(rejected_hits,(void *) hit); | |
9748 | } | |
9749 | } | |
9750 | ||
9751 | if (n_good_spliceends == 0) { | |
9752 | /* Conjunction is too strict. Allow for disjunction instead. */ | |
9753 | List_free(&rejected_hits); | |
9754 | for (p = spliceends; p != NULL; p = List_next(p)) { | |
9755 | hit = (Stage3end_T) List_head(p); | |
9756 | if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP || | |
9757 | (Stage3end_shortexon_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP)) { | |
9758 | debug7(printf("accepting distance %d, nmismatches %d, probability %f\n", | |
9759 | Stage3end_distance(hit),Stage3end_nmismatches_whole(hit), | |
9760 | Stage3end_shortexon_prob(hit))); | |
9761 | n_good_spliceends += 1; | |
9762 | accepted_hits = List_push(accepted_hits,(void *) hit); | |
9763 | } else { | |
9764 | rejected_hits = List_push(rejected_hits,(void *) hit); | |
9765 | } | |
9766 | } | |
9767 | } | |
9768 | ||
9769 | for (p = rejected_hits; p != NULL; p = List_next(p)) { | |
9770 | hit = (Stage3end_T) List_head(p); | |
9771 | Stage3end_free(&hit); | |
9772 | } | |
9773 | List_free(&rejected_hits); | |
9774 | List_free(&spliceends); | |
9775 | ||
9776 | if (n_good_spliceends == 1) { | |
9777 | hits = List_push(hits,List_head(accepted_hits)); | |
9778 | List_free(&accepted_hits); | |
9779 | ||
9780 | } else { | |
9781 | /* 5. Multiple hits, shortexon */ | |
9782 | debug7(printf("multiple splice hits, shortexon\n")); | |
9783 | ||
9784 | /* Process multiple double ambiguous first */ | |
9785 | hitarray = (Stage3end_T *) List_to_array_n(&n,accepted_hits); | |
9786 | qsort(hitarray,n,sizeof(Stage3end_T),substringD_match_length_cmp); | |
9787 | List_free(&accepted_hits); | |
9788 | single_ambig_hits = (List_T) NULL; | |
9789 | ||
9790 | i = 0; | |
9791 | while (i < n) { | |
9792 | hit = hitarray[i]; | |
9793 | donor = Stage3end_substringD(hit); | |
9794 | donor_length = Substring_match_length_orig(donor); | |
9795 | acceptor = Stage3end_substringA(hit); | |
9796 | acceptor_length = Substring_match_length_orig(acceptor); | |
9797 | j = i + 1; | |
9798 | while (j < n && Substring_match_length_orig(Stage3end_substringD(hitarray[j])) == donor_length && | |
9799 | Substring_match_length_orig(Stage3end_substringA(hitarray[j])) == acceptor_length) { | |
9800 | j++; | |
9801 | } | |
9802 | if (j == i + 1) { | |
9803 | /* Save for later analysis */ | |
9804 | single_ambig_hits = List_push(single_ambig_hits,(void *) hit); | |
9805 | } else { | |
9806 | donor_ambcoords = acceptor_ambcoords = NULL; | |
9807 | donor_amb_knowni = acceptor_amb_knowni = (Intlist_T) NULL; | |
9808 | donor_amb_nmismatches = acceptor_amb_nmismatches = (Intlist_T) NULL; | |
9809 | donor_amb_probs = acceptor_amb_probs = (Doublelist_T) NULL; | |
9810 | ||
9811 | qsort(&(hitarray[i]),j-i,sizeof(Stage3end_T),Stage3end_shortexon_substringD_cmp); | |
9812 | donor = Stage3end_substringD(hitarray[i]); | |
9813 | #ifdef LARGE_GENOMES | |
9814 | donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor)); | |
9815 | #else | |
9816 | donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor)); | |
9817 | #endif | |
9818 | donor_amb_knowni = Intlist_push(donor_amb_knowni,-1); | |
9819 | donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor)); | |
9820 | donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor)); | |
9821 | ||
9822 | lastpos = Substring_left_genomicseg(donor); | |
9823 | for (k = i + 1; k < j; k++) { | |
9824 | donor = Stage3end_substringD(hitarray[k]); | |
9825 | if (Substring_left_genomicseg(donor) != lastpos) { | |
9826 | #ifdef LARGE_GENOMES | |
9827 | donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor)); | |
9828 | #else | |
9829 | donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor)); | |
9830 | #endif | |
9831 | donor_amb_knowni = Intlist_push(donor_amb_knowni,-1); | |
9832 | donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor)); | |
9833 | donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor)); | |
9834 | } | |
9835 | } | |
9836 | ||
9837 | qsort(&(hitarray[i]),j-i,sizeof(Stage3end_T),Stage3end_shortexon_substringA_cmp); | |
9838 | acceptor = Stage3end_substringA(hitarray[i]); | |
9839 | #ifdef LARGE_GENOMES | |
9840 | acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9841 | #else | |
9842 | acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9843 | #endif | |
9844 | acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1); | |
9845 | acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor)); | |
9846 | acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor)); | |
9847 | ||
9848 | lastpos = Substring_left_genomicseg(acceptor); | |
9849 | for (k = i + 1; k < j; k++) { | |
9850 | acceptor = Stage3end_substringA(hitarray[k]); | |
9851 | if (Substring_left_genomicseg(acceptor) != lastpos) { | |
9852 | #ifdef LARGE_GENOMES | |
9853 | acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9854 | #else | |
9855 | acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9856 | #endif | |
9857 | acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1); | |
9858 | acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor)); | |
9859 | acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor)); | |
9860 | } | |
9861 | } | |
9862 | ||
9863 | shortexon = Stage3end_substringS(hitarray[i]); | |
9864 | sensedir = Stage3end_sensedir(hitarray[i]); | |
9865 | if (Intlist_length(donor_amb_nmismatches) > 1 && Intlist_length(acceptor_amb_nmismatches) > 1) { | |
9866 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon, | |
9867 | /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon), | |
9868 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs), | |
9869 | /*amb_length_donor*/donor_length,/*amb_length_acceptor*/acceptor_length, | |
9870 | donor_ambcoords,acceptor_ambcoords, | |
9871 | donor_amb_knowni,acceptor_amb_knowni, | |
9872 | donor_amb_nmismatches,acceptor_amb_nmismatches, | |
9873 | donor_amb_probs,acceptor_amb_probs, | |
9874 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true, | |
9875 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
9876 | ||
9877 | } else if (Intlist_length(donor_amb_nmismatches) > 1) { | |
9878 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon, | |
9879 | /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon), | |
9880 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor), | |
9881 | /*amb_length_donor*/donor_length,/*amb_length_acceptor*/0, | |
9882 | donor_ambcoords,/*acceptor_ambcoords*/NULL, | |
9883 | donor_amb_knowni,/*amb_knowni_acceptor*/NULL, | |
9884 | donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL, | |
9885 | donor_amb_probs,/*amb_probs_acceptor*/NULL, | |
9886 | /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true, | |
9887 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
9888 | ||
9889 | } else if (Intlist_length(acceptor_amb_nmismatches) > 1) { | |
9890 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon, | |
9891 | /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
9892 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs), | |
9893 | /*amb_length_donor*/0,/*amb_length_acceptor*/acceptor_length, | |
9894 | /*ambcoords_donor*/NULL,acceptor_ambcoords, | |
9895 | /*amb_knowni_donor*/NULL,acceptor_amb_knowni, | |
9896 | /*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches, | |
9897 | /*amb_probs_donor*/NULL,acceptor_amb_probs, | |
9898 | /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true, | |
9899 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
9900 | ||
9901 | } else { | |
9902 | /* A singleton, apparently due to many duplicates. Is this possible? */ | |
9903 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
9904 | /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
9905 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor), | |
9906 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
9907 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
9908 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
9909 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
9910 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
9911 | /*copy_donor_p*/true,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true, | |
9912 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
9913 | ||
9914 | } | |
9915 | ||
9916 | Doublelist_free(&donor_amb_probs); | |
9917 | Intlist_free(&donor_amb_nmismatches); | |
9918 | Intlist_free(&donor_amb_knowni); | |
9919 | Doublelist_free(&acceptor_amb_probs); | |
9920 | Intlist_free(&acceptor_amb_nmismatches); | |
9921 | Intlist_free(&acceptor_amb_knowni); | |
9922 | #ifdef LARGE_GENOMES | |
9923 | Uint8list_free(&donor_ambcoords); | |
9924 | Uint8list_free(&acceptor_ambcoords); | |
9925 | #else | |
9926 | Uintlist_free(&donor_ambcoords); | |
9927 | Uintlist_free(&acceptor_ambcoords); | |
9928 | #endif | |
9929 | for (k = i; k < j; k++) { | |
9930 | hit = hitarray[k]; | |
9931 | Stage3end_free(&hit); | |
9932 | } | |
9933 | } | |
9934 | ||
9935 | i = j; | |
9936 | } | |
9937 | FREE(hitarray); | |
9938 | ||
9939 | /* Process single ambiguous on donor side */ | |
9940 | hitarray = (Stage3end_T *) List_to_array_n(&n,single_ambig_hits); | |
9941 | qsort(hitarray,n,sizeof(Stage3end_T),substringD_match_length_cmp); | |
9942 | List_free(&single_ambig_hits); | |
9943 | single_ambig_hits = (List_T) NULL; | |
9944 | ||
9945 | i = 0; | |
9946 | while (i < n) { | |
9947 | hit = hitarray[i]; | |
9948 | donor = Stage3end_substringD(hit); | |
9949 | donor_length = Substring_match_length_orig(donor); | |
9950 | j = i + 1; | |
9951 | while (j < n && Substring_match_length_orig(Stage3end_substringD(hitarray[j])) == donor_length) { | |
9952 | j++; | |
9953 | } | |
9954 | if (j == i + 1) { | |
9955 | /* Save for later analysis */ | |
9956 | single_ambig_hits = List_push(single_ambig_hits,(void *) hit); | |
9957 | } else { | |
9958 | acceptor_ambcoords = NULL; | |
9959 | acceptor_amb_knowni = (Intlist_T) NULL; | |
9960 | acceptor_amb_nmismatches = (Intlist_T) NULL; | |
9961 | acceptor_amb_probs = (Doublelist_T) NULL; | |
9962 | ||
9963 | for (k = i + 1; k < j; k++) { | |
9964 | acceptor = Stage3end_substringA(hitarray[i]); | |
9965 | #ifdef LARGE_GENOMES | |
9966 | acceptor_ambcoords = Uint8list_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9967 | #else | |
9968 | acceptor_ambcoords = Uintlist_push(acceptor_ambcoords,Substring_splicecoord(acceptor)); | |
9969 | #endif | |
9970 | acceptor_amb_knowni = Intlist_push(acceptor_amb_knowni,-1); | |
9971 | acceptor_amb_nmismatches = Intlist_push(acceptor_amb_nmismatches,Substring_nmismatches_whole(acceptor)); | |
9972 | acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor)); | |
9973 | } | |
9974 | ||
9975 | shortexon = Stage3end_substringS(hitarray[i]); | |
9976 | sensedir = Stage3end_sensedir(hitarray[i]); | |
9977 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon, | |
9978 | /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
9979 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs), | |
9980 | /*amb_length_donor*/0,/*amb_length_acceptor*/Substring_match_length_orig(acceptor), | |
9981 | /*ambcoords_donor*/NULL,acceptor_ambcoords, | |
9982 | /*amb_knowni_donor*/NULL,acceptor_amb_knowni, | |
9983 | /*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches, | |
9984 | /*amb_probs_donor*/NULL,acceptor_amb_probs, | |
9985 | /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true, | |
9986 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
9987 | Doublelist_free(&acceptor_amb_probs); | |
9988 | Intlist_free(&acceptor_amb_nmismatches); | |
9989 | Intlist_free(&acceptor_amb_knowni); | |
9990 | #ifdef LARGE_GENOMES | |
9991 | Uint8list_free(&acceptor_ambcoords); | |
9992 | #else | |
9993 | Uintlist_free(&acceptor_ambcoords); | |
9994 | #endif | |
9995 | for (k = i; k < j; k++) { | |
9996 | hit = hitarray[k]; | |
9997 | Stage3end_free(&hit); | |
9998 | } | |
9999 | } | |
10000 | ||
10001 | i = j; | |
10002 | } | |
10003 | FREE(hitarray); | |
10004 | ||
10005 | /* Process single ambiguous on acceptor side */ | |
10006 | hitarray = (Stage3end_T *) List_to_array_n(&n,single_ambig_hits); | |
10007 | qsort(hitarray,n,sizeof(Stage3end_T),substringA_match_length_cmp); | |
10008 | List_free(&single_ambig_hits); | |
10009 | ||
10010 | i = 0; | |
10011 | while (i < n) { | |
10012 | hit = hitarray[i]; | |
10013 | acceptor = Stage3end_substringA(hit); | |
10014 | acceptor_length = Substring_match_length_orig(acceptor); | |
10015 | j = i + 1; | |
10016 | while (j < n && Substring_match_length_orig(Stage3end_substringA(hitarray[j])) == acceptor_length) { | |
10017 | j++; | |
10018 | } | |
10019 | if (j == i + 1) { | |
10020 | /* Finally, a confirmed unique */ | |
10021 | hits = List_push(hits,(void *) hit); | |
10022 | } else { | |
10023 | donor_ambcoords = NULL; | |
10024 | donor_amb_knowni = (Intlist_T) NULL; | |
10025 | donor_amb_nmismatches = (Intlist_T) NULL; | |
10026 | donor_amb_probs = (Doublelist_T) NULL; | |
10027 | ||
10028 | for (k = i + 1; k < j; k++) { | |
10029 | donor = Stage3end_substringD(hitarray[i]); | |
10030 | #ifdef LARGE_GENOMES | |
10031 | donor_ambcoords = Uint8list_push(donor_ambcoords,Substring_splicecoord(donor)); | |
10032 | #else | |
10033 | donor_ambcoords = Uintlist_push(donor_ambcoords,Substring_splicecoord(donor)); | |
10034 | #endif | |
10035 | donor_amb_knowni = Intlist_push(donor_amb_knowni,-1); | |
10036 | donor_amb_nmismatches = Intlist_push(donor_amb_nmismatches,Substring_nmismatches_whole(donor)); | |
10037 | donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor)); | |
10038 | } | |
10039 | ||
10040 | shortexon = Stage3end_substringS(hitarray[i]); | |
10041 | sensedir = Stage3end_sensedir(hitarray[i]); | |
10042 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon, | |
10043 | /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon), | |
10044 | Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor), | |
10045 | /*amb_length_donor*/Substring_match_length_orig(donor),/*amb_length_acceptor*/0, | |
10046 | donor_ambcoords,/*acceptor_ambcoords*/NULL, | |
10047 | donor_amb_knowni,/*amb_knowni_acceptor*/NULL, | |
10048 | donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL, | |
10049 | donor_amb_probs,/*amb_probs_acceptor*/NULL, | |
10050 | /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true, | |
10051 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10052 | Doublelist_free(&donor_amb_probs); | |
10053 | Intlist_free(&donor_amb_nmismatches); | |
10054 | Intlist_free(&donor_amb_knowni); | |
10055 | #ifdef LARGE_GENOMES | |
10056 | Uint8list_free(&donor_ambcoords); | |
10057 | #else | |
10058 | Uintlist_free(&donor_ambcoords); | |
10059 | #endif | |
10060 | for (k = i; k < j; k++) { | |
10061 | hit = hitarray[k]; | |
10062 | Stage3end_free(&hit); | |
10063 | } | |
10064 | } | |
10065 | ||
10066 | i = j; | |
10067 | } | |
10068 | FREE(hitarray); | |
10069 | } | |
10070 | } | |
10071 | } | |
10072 | ||
10073 | ||
10074 | /* Short exon using known splicing, originally on plus strand */ | |
10075 | if (shortexon_orig_plusp == true) { | |
10076 | debug4k(printf("Short exon candidate, orig_plusp. Saw short exon acceptor...donor on segment i\n")); | |
10077 | sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI; | |
10078 | assert(plusp == true); | |
10079 | assert(sensedir == SENSE_FORWARD); | |
10080 | ||
10081 | for (j1 = joffset; j1 < j; j1++) { | |
10082 | if (splicetypes[j1] == ACCEPTOR) { | |
10083 | leftpos = splicesites[j1] - segmentm_left; | |
10084 | debug4k(printf(" Doing Splicetrie_find_left from leftpos %d (plus)\n",leftpos)); | |
10085 | if ((splicesites_i_left = | |
10086 | Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1, | |
10087 | /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset, | |
10088 | query_compress,queryptr,querylength,max_mismatches_allowed,/*plusp*/true, | |
10089 | genestrand,first_read_p, | |
10090 | /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) { | |
10091 | ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false; | |
10092 | ||
10093 | for (j2 = j1 + 1; j2 < j; j2++) { | |
10094 | if (splicetypes[j2] == DONOR && splicesites[j2] > splicesites[j1]) { | |
10095 | rightpos = splicesites[j2] - segmentm_left; | |
10096 | debug4k(printf(" Doing Splicetrie_find_right from rightpos %d (plus)\n",rightpos)); | |
10097 | if ((nmismatches_shortexon_middle = | |
10098 | Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos, | |
10099 | plusp,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left && | |
10100 | (splicesites_i_right = | |
10101 | Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2, | |
10102 | /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh, | |
10103 | query_compress,queryptr, | |
10104 | max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle, | |
10105 | /*plusp*/true,genestrand,first_read_p, | |
10106 | /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) { | |
10107 | ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false; | |
10108 | ||
10109 | debug4k(printf(" donor %s ... acceptor %d (%llu) ... donor %d (%llu) ... acceptor %s: %d + %d + %d mismatches\n", | |
10110 | Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1], | |
10111 | j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right), | |
10112 | nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right)); | |
10113 | ||
10114 | if (ambp_left == true && ambp_right == true) { | |
10115 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1, | |
10116 | /*donor_coord*/splicesites[j2],/*donor_knowni*/j2, | |
10117 | /*acceptor_pos*/leftpos,/*donor_pos*/rightpos, | |
10118 | nmismatches_shortexon_middle, | |
10119 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10120 | /*left*/segmentm_left,query_compress, | |
10121 | querylength,/*plusp*/true,genestrand, | |
10122 | sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true, | |
10123 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10124 | if (shortexon != NULL) { | |
10125 | debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left)); | |
10126 | ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_left,splicesites); | |
10127 | ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_right,splicesites); | |
10128 | amb_length_donor = leftpos /*- nmismatches_shortexon_left*/; | |
10129 | amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/; | |
10130 | segmentm->usedp = true; | |
10131 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon, | |
10132 | Doublelist_max(probs_donor),Substring_siteA_prob(shortexon), | |
10133 | Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor), | |
10134 | amb_length_donor,amb_length_acceptor, | |
10135 | ambcoords_donor,ambcoords_acceptor, | |
10136 | /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/splicesites_i_right, | |
10137 | /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/nmismatches_list_right, | |
10138 | /*amb_probs_donor*/probs_donor,/*amb_nmismatches_acceptor*/probs_acceptor, | |
10139 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10140 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10141 | Doublelist_free(&probs_donor); | |
10142 | Doublelist_free(&probs_acceptor); | |
10143 | #ifdef LARGE_GENOMES | |
10144 | Uint8list_free(&ambcoords_donor); | |
10145 | Uint8list_free(&ambcoords_acceptor); | |
10146 | #else | |
10147 | Uintlist_free(&ambcoords_donor); | |
10148 | Uintlist_free(&ambcoords_acceptor); | |
10149 | #endif | |
10150 | } | |
10151 | ||
10152 | } else if (ambp_left == true && ambp_right == false) { | |
10153 | debug4k(printf("ambp_left true, ambp_right false\n")); | |
10154 | best_right_j = Intlist_head(splicesites_i_right); | |
10155 | ||
10156 | debug4k(printf("shortexon with amb_acceptor at %d (%llu) ... donor at %d (%llu)\n", | |
10157 | j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2])); | |
10158 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1, | |
10159 | /*donor_coord*/splicesites[j2],/*donor_knowni*/j2, | |
10160 | /*acceptor_pos*/leftpos,/*donor_pos*/rightpos, | |
10161 | nmismatches_shortexon_middle, | |
10162 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10163 | /*left*/segmentm_left,query_compress, | |
10164 | querylength,/*plusp*/true,genestrand, | |
10165 | sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false, | |
10166 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10167 | ||
10168 | debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j])); | |
10169 | acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j, | |
10170 | /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10171 | nmismatches_shortexon_right, | |
10172 | /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos, | |
10173 | query_compress,querylength,/*plusp*/true,genestrand, | |
10174 | /*sensedir*/SENSE_FORWARD,segmentm->chrnum, | |
10175 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10176 | ||
10177 | if (shortexon == NULL || acceptor == NULL) { | |
10178 | if (shortexon != NULL) Substring_free(&shortexon); | |
10179 | if (acceptor != NULL) Substring_free(&acceptor); | |
10180 | } else { | |
10181 | debug4k(printf("ambp_left true, ambp_right false: New two-thirds shortexon at left %llu\n", | |
10182 | (unsigned long long) segmentm_left)); | |
10183 | ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_left,splicesites); | |
10184 | amb_length_donor = leftpos /*- nmismatches_shortexon_left*/; | |
10185 | segmentm->usedp = true; | |
10186 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon, | |
10187 | Doublelist_max(probs_donor),Substring_siteA_prob(shortexon), | |
10188 | Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor), | |
10189 | amb_length_donor,/*amb_length_acceptor*/0, | |
10190 | ambcoords_donor,/*ambcoords_acceptor*/NULL, | |
10191 | /*amb_knowni_donor*/splicesites_i_left,/*amb_knowni_acceptor*/NULL, | |
10192 | /*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/NULL, | |
10193 | /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL, | |
10194 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10195 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10196 | Doublelist_free(&probs_donor); | |
10197 | #ifdef LARGE_GENOMES | |
10198 | Uint8list_free(&ambcoords_donor); | |
10199 | #else | |
10200 | Uintlist_free(&ambcoords_donor); | |
10201 | #endif | |
10202 | } | |
10203 | ||
10204 | } else if (ambp_left == false && ambp_right == true) { | |
10205 | debug4k(printf("ambp_left false, ambp_right true\n")); | |
10206 | best_left_j = Intlist_head(splicesites_i_left); | |
10207 | ||
10208 | debug4k(printf("donor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j])); | |
10209 | donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j, | |
10210 | /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10211 | nmismatches_shortexon_left, | |
10212 | /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos, | |
10213 | query_compress,querylength,/*plusp*/true,genestrand, | |
10214 | /*sensedir*/SENSE_FORWARD,segmentm->chrnum, | |
10215 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10216 | ||
10217 | debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n", | |
10218 | j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2])); | |
10219 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1, | |
10220 | /*donor_coord*/splicesites[j2],/*donor_knowni*/j2, | |
10221 | /*acceptor_pos*/leftpos,/*donor_pos*/rightpos, | |
10222 | nmismatches_shortexon_middle, | |
10223 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10224 | /*left*/segmentm_left,query_compress, | |
10225 | querylength,/*plusp*/true,genestrand, | |
10226 | /*sensedir*/SENSE_FORWARD,/*acceptor_ambp*/false,/*donor_ambp*/true, | |
10227 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10228 | ||
10229 | if (donor == NULL || shortexon == NULL) { | |
10230 | if (donor != NULL) Substring_free(&donor); | |
10231 | if (shortexon != NULL) Substring_free(&shortexon); | |
10232 | } else { | |
10233 | ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_right,splicesites); | |
10234 | amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/; | |
10235 | segmentm->usedp = true; | |
10236 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon, | |
10237 | Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
10238 | Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor), | |
10239 | /*amb_length_donor*/0,amb_length_acceptor, | |
10240 | /*ambcoords_donor*/NULL,ambcoords_acceptor, | |
10241 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_right, | |
10242 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_right, | |
10243 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor, | |
10244 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10245 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10246 | Doublelist_free(&probs_acceptor); | |
10247 | #ifdef LARGE_GENOMES | |
10248 | Uint8list_free(&ambcoords_acceptor); | |
10249 | #else | |
10250 | Uintlist_free(&ambcoords_acceptor); | |
10251 | #endif | |
10252 | } | |
10253 | ||
10254 | ||
10255 | } else { /* ambp_left == false && ambp_right == false */ | |
10256 | debug4k(printf("ambp_left false, ambp_right false\n")); | |
10257 | best_left_j = Intlist_head(splicesites_i_left); | |
10258 | best_right_j = Intlist_head(splicesites_i_right); | |
10259 | donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j, | |
10260 | /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10261 | nmismatches_shortexon_left, | |
10262 | /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos, | |
10263 | query_compress,querylength,/*plusp*/true,genestrand, | |
10264 | /*sensedir*/SENSE_FORWARD,segmentm->chrnum, | |
10265 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10266 | ||
10267 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1, | |
10268 | /*donor_coord*/splicesites[j2],/*donor_knowni*/j2, | |
10269 | /*acceptor_pos*/leftpos,/*donor_pos*/rightpos, | |
10270 | nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10271 | /*left*/segmentm_left,query_compress, | |
10272 | querylength,/*plusp*/true,genestrand, | |
10273 | sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false, | |
10274 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10275 | ||
10276 | acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j, | |
10277 | /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10278 | nmismatches_shortexon_right, | |
10279 | /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos, | |
10280 | query_compress,querylength,/*plusp*/true,genestrand, | |
10281 | /*sensedir*/SENSE_FORWARD,segmentm->chrnum, | |
10282 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10283 | ||
10284 | if (donor == NULL || shortexon == NULL || acceptor == NULL) { | |
10285 | if (donor != NULL) Substring_free(&donor); | |
10286 | if (shortexon != NULL) Substring_free(&shortexon); | |
10287 | if (acceptor != NULL) Substring_free(&acceptor); | |
10288 | } else { | |
10289 | debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left)); | |
10290 | segmentm->usedp = true; | |
10291 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
10292 | Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
10293 | Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor), | |
10294 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
10295 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
10296 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
10297 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
10298 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
10299 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10300 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10301 | } | |
10302 | } | |
10303 | Intlist_free(&nmismatches_list_right); | |
10304 | Intlist_free(&splicesites_i_right); | |
10305 | } | |
10306 | } | |
10307 | } | |
10308 | Intlist_free(&nmismatches_list_left); | |
10309 | Intlist_free(&splicesites_i_left); | |
10310 | } | |
10311 | } | |
10312 | } | |
10313 | debug4k(printf("End of case 1\n")); | |
10314 | } | |
10315 | ||
10316 | /* Short exon using known splicing, originally on minus strand */ | |
10317 | if (shortexon_orig_minusp == true) { | |
10318 | debug4k(printf("Short exon candidate, orig_minusp. Saw short exon antidonor...antiacceptor on segment i\n")); | |
10319 | sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD; | |
10320 | assert(plusp == false); | |
10321 | assert(sensedir == SENSE_ANTI); | |
10322 | ||
10323 | for (j1 = joffset; j1 < j; j1++) { | |
10324 | if (splicetypes[j1] == ANTIDONOR) { | |
10325 | leftpos = splicesites[j1] - segmentm_left; | |
10326 | debug4k(printf(" Doing Splicetrie_find_left from leftpos %d (minus)\n",leftpos)); | |
10327 | if ((splicesites_i_left = | |
10328 | Splicetrie_find_left(&nmismatches_shortexon_left,&nmismatches_list_left,j1, | |
10329 | /*origleft*/segmentm_left,/*pos5*/0,/*pos3*/leftpos,segmentm->chroffset, | |
10330 | query_compress,queryptr,querylength,max_mismatches_allowed, | |
10331 | /*plusp*/false,genestrand,first_read_p, | |
10332 | /*collect_all_p*/pairedp == true && first_read_p != plusp)) != NULL) { | |
10333 | ambp_left = (leftpos < min_shortend || Intlist_length(splicesites_i_left) > 1) ? true : false; | |
10334 | ||
10335 | for (j2 = j1 + 1; j2 < j; j2++) { | |
10336 | if (splicetypes[j2] == ANTIACCEPTOR && splicesites[j2] > splicesites[j1]) { | |
10337 | rightpos = splicesites[j2] - segmentm_left; | |
10338 | debug4k(printf(" Doing Splicetrie_find_right from rightpos %d (minus)\n",rightpos)); | |
10339 | if ((nmismatches_shortexon_middle = | |
10340 | Genome_count_mismatches_substring(query_compress,segmentm_left,/*pos5*/leftpos,/*pos3*/rightpos, | |
10341 | /*plusp*/false,genestrand)) <= max_mismatches_allowed - nmismatches_shortexon_left && | |
10342 | (splicesites_i_right = | |
10343 | Splicetrie_find_right(&nmismatches_shortexon_right,&nmismatches_list_right,j2, | |
10344 | /*origleft*/segmentm_left,/*pos5*/rightpos,/*pos3*/querylength,segmentm->chrhigh, | |
10345 | query_compress,queryptr, | |
10346 | max_mismatches_allowed - nmismatches_shortexon_left - nmismatches_shortexon_middle, | |
10347 | /*plusp*/false,genestrand,first_read_p, | |
10348 | /*collect_all_p*/pairedp == true && first_read_p == plusp)) != NULL) { | |
10349 | ambp_right = (querylength - rightpos < min_shortend || Intlist_length(splicesites_i_right) > 1) ? true : false; | |
10350 | ||
10351 | debug4k(printf(" antiacceptor %s ... antidonor %d (%llu) ... antiacceptor %d (%llu) ... antidonor %s: %d + %d + %d mismatches\n", | |
10352 | Intlist_to_string(splicesites_i_left),j1,(unsigned long long) splicesites[j1], | |
10353 | j2,(unsigned long long) splicesites[j2],Intlist_to_string(splicesites_i_right), | |
10354 | nmismatches_shortexon_left,nmismatches_shortexon_middle,nmismatches_shortexon_right)); | |
10355 | ||
10356 | if (ambp_left == true && ambp_right == true) { | |
10357 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2, | |
10358 | /*donor_coord*/splicesites[j1],/*donor_knowni*/j1, | |
10359 | /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle, | |
10360 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10361 | /*left*/segmentm_left,query_compress, | |
10362 | querylength,/*plusp*/false,genestrand, | |
10363 | sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true, | |
10364 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10365 | if (shortexon != NULL) { | |
10366 | debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left)); | |
10367 | ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_right,splicesites); | |
10368 | ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_left,splicesites); | |
10369 | amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/; | |
10370 | amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/; | |
10371 | segmentm->usedp = true; | |
10372 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon, | |
10373 | Doublelist_max(probs_donor),Substring_siteA_prob(shortexon), | |
10374 | Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor), | |
10375 | amb_length_donor,amb_length_acceptor, | |
10376 | ambcoords_donor,ambcoords_acceptor, | |
10377 | /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/splicesites_i_left, | |
10378 | /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/nmismatches_list_left, | |
10379 | /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/probs_acceptor, | |
10380 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10381 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10382 | Doublelist_free(&probs_donor); | |
10383 | Doublelist_free(&probs_acceptor); | |
10384 | #ifdef LARGE_GENOMES | |
10385 | Uint8list_free(&ambcoords_donor); | |
10386 | Uint8list_free(&ambcoords_acceptor); | |
10387 | #else | |
10388 | Uintlist_free(&ambcoords_donor); | |
10389 | Uintlist_free(&ambcoords_acceptor); | |
10390 | #endif | |
10391 | } | |
10392 | ||
10393 | } else if (ambp_left == true && ambp_right == false) { | |
10394 | debug4k(printf("ambp_left true, ambp_right false\n")); | |
10395 | best_right_j = Intlist_head(splicesites_i_right); | |
10396 | ||
10397 | debug4k(printf("shortexon with amb_donor at %d (%llu) ... acceptor at %d (%llu)\n", | |
10398 | j1,(unsigned long long) splicesites[j1],j2,(unsigned long long) splicesites[j2])); | |
10399 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2, | |
10400 | /*donor_coord*/splicesites[j1],/*donor_knowni*/j1, | |
10401 | /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle, | |
10402 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10403 | /*left*/segmentm_left,query_compress, | |
10404 | querylength,/*plusp*/false,genestrand, | |
10405 | sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true, | |
10406 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10407 | ||
10408 | debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j])); | |
10409 | donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j, | |
10410 | /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10411 | nmismatches_shortexon_right, | |
10412 | /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos, | |
10413 | query_compress,querylength,/*plusp*/false,genestrand, | |
10414 | /*sensedir*/SENSE_ANTI,segmentm->chrnum, | |
10415 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10416 | ||
10417 | if (donor == NULL || shortexon == NULL) { | |
10418 | if (donor != NULL) Substring_free(&donor); | |
10419 | if (shortexon != NULL) Substring_free(&shortexon); | |
10420 | } else { | |
10421 | ambcoords_acceptor = lookup_splicesites(&probs_acceptor,splicesites_i_left,splicesites); | |
10422 | amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/; | |
10423 | segmentm->usedp = true; | |
10424 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon, | |
10425 | Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
10426 | Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor), | |
10427 | /*amb_length_donor*/0,amb_length_acceptor, | |
10428 | /*ambcoords_donor*/NULL,ambcoords_acceptor, | |
10429 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i_left, | |
10430 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_left, | |
10431 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor, | |
10432 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10433 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10434 | Doublelist_free(&probs_acceptor); | |
10435 | #ifdef LARGE_GENOMES | |
10436 | Uint8list_free(&ambcoords_acceptor); | |
10437 | #else | |
10438 | Uintlist_free(&ambcoords_acceptor); | |
10439 | #endif | |
10440 | } | |
10441 | ||
10442 | } else if (ambp_left == false && ambp_right == true) { | |
10443 | debug4k(printf("ambp_left false, ambp_right true\n")); | |
10444 | best_left_j = Intlist_head(splicesites_i_left); | |
10445 | ||
10446 | debug4k(printf("acceptor at %d (%llu)\n",best_left_j,(unsigned long long) splicesites[best_left_j])); | |
10447 | acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j, | |
10448 | /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10449 | nmismatches_shortexon_left, | |
10450 | /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos, | |
10451 | query_compress,querylength,/*plusp*/false,genestrand, | |
10452 | /*sensedir*/SENSE_ANTI,segmentm->chrnum, | |
10453 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10454 | ||
10455 | debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n", | |
10456 | j2,(unsigned long long) splicesites[j2],j1,(unsigned long long) plicesites[j1])); | |
10457 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2, | |
10458 | /*donor_coord*/splicesites[j1],/*donor_knowni*/j1, | |
10459 | /*acceptor_pos*/rightpos,/*donor_pos*/leftpos,nmismatches_shortexon_middle, | |
10460 | /*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10461 | /*left*/segmentm_left,query_compress, | |
10462 | querylength,/*plusp*/false,genestrand, | |
10463 | sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false, | |
10464 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10465 | ||
10466 | if (shortexon == NULL || acceptor == NULL) { | |
10467 | if (shortexon != NULL) Substring_free(&shortexon); | |
10468 | if (acceptor != NULL) Substring_free(&acceptor); | |
10469 | } else { | |
10470 | debug4k(printf("ambp_left false, ambp_right true: New splice at left %llu\n", | |
10471 | (unsigned long long) segmentm_left)); | |
10472 | ambcoords_donor = lookup_splicesites(&probs_donor,splicesites_i_right,splicesites); | |
10473 | amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/; | |
10474 | segmentm->usedp = true; | |
10475 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon, | |
10476 | Doublelist_max(probs_donor),Substring_siteA_prob(shortexon), | |
10477 | Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor), | |
10478 | amb_length_donor,/*amb_length_acceptor*/0, | |
10479 | ambcoords_donor,/*ambcoords_acceptor*/NULL, | |
10480 | /*amb_knowni_donor*/splicesites_i_right,/*amb_knowni_acceptor*/NULL, | |
10481 | /*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/NULL, | |
10482 | /*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL, | |
10483 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10484 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10485 | Doublelist_free(&probs_donor); | |
10486 | #ifdef LARGE_GENOMES | |
10487 | Uint8list_free(&ambcoords_donor); | |
10488 | #else | |
10489 | Uintlist_free(&ambcoords_donor); | |
10490 | #endif | |
10491 | } | |
10492 | ||
10493 | } else { /* ambp_left == false && ambp_right == false */ | |
10494 | best_left_j = Intlist_head(splicesites_i_left); | |
10495 | best_right_j = Intlist_head(splicesites_i_right); | |
10496 | acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j, | |
10497 | /*splice_pos*/leftpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10498 | nmismatches_shortexon_left, | |
10499 | /*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos, | |
10500 | query_compress,querylength,/*plusp*/false,genestrand, | |
10501 | /*sensedir*/SENSE_ANTI,segmentm->chrnum, | |
10502 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10503 | ||
10504 | shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2, | |
10505 | /*donor_coord*/splicesites[j1],/*donor_knowni*/j1, | |
10506 | /*acceptor_pos*/rightpos,/*donor_pos*/leftpos, | |
10507 | nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0, | |
10508 | /*left*/segmentm_left,query_compress, | |
10509 | querylength,/*plusp*/false,genestrand, | |
10510 | sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false, | |
10511 | segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10512 | ||
10513 | donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j, | |
10514 | /*splice_pos*/rightpos,/*substring_querystart*/0,/*substring_queryend*/querylength, | |
10515 | nmismatches_shortexon_right, | |
10516 | /*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos, | |
10517 | query_compress,querylength,/*plusp*/false,genestrand, | |
10518 | /*sensedir*/SENSE_ANTI,segmentm->chrnum, | |
10519 | segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength); | |
10520 | ||
10521 | if (acceptor == NULL || shortexon == NULL || donor == NULL) { | |
10522 | if (acceptor != NULL) Substring_free(&acceptor); | |
10523 | if (shortexon != NULL) Substring_free(&shortexon); | |
10524 | if (donor != NULL) Substring_free(&donor); | |
10525 | } else { | |
10526 | debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left)); | |
10527 | segmentm->usedp = true; | |
10528 | hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon, | |
10529 | Substring_chimera_prob(donor),Substring_siteA_prob(shortexon), | |
10530 | Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor), | |
10531 | /*amb_length_donor*/0,/*amb_length_acceptor*/0, | |
10532 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, | |
10533 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, | |
10534 | /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL, | |
10535 | /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL, | |
10536 | /*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false, | |
10537 | splicing_penalty,querylength,sensedir,/*sarrayp*/false)); | |
10538 | } | |
10539 | } | |
10540 | Intlist_free(&nmismatches_list_right); | |
10541 | Intlist_free(&splicesites_i_right); | |
10542 | } | |
10543 | } | |
10544 | } | |
10545 | Intlist_free(&nmismatches_list_left); | |
10546 | Intlist_free(&splicesites_i_left); | |
10547 | } | |
10548 | } | |
10549 | } | |
10550 | debug4k(printf("End of case 2\n")); | |
10551 | } | |
10552 | /* End of known splicesites, segment i */ | |
10553 | } | |
10554 | } | |
10555 | ||
10556 | debug4k(printf("Finished find_known_doublesplices with %d hits\n",List_length(hits))); | |
10557 | return hits; | |
10558 | } | |
10559 | #endif | |
10560 | ||
10561 | ||
10562 | 9550 | |
10563 | 9551 | static void |
10564 | 9552 | find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors, |
10579 | 9567 | int nmismatches, jstart, jend, j; |
10580 | 9568 | int splice_pos; |
10581 | 9569 | |
10582 | #ifdef HAVE_ALLOCA | |
10583 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10584 | #else | |
10585 | int mismatch_positions[MAX_READLENGTH+1]; | |
10586 | #endif | |
10587 | ||
10588 | 9570 | int nmismatches_left, nmismatches_right; |
10589 | 9571 | int *floors_from_neg3, *floors_to_pos3; |
10590 | 9572 | int sensedir; |
10592 | 9574 | int splice_pos_start, splice_pos_end; |
10593 | 9575 | #ifdef DEBUG4E |
10594 | 9576 | int i; |
9577 | #endif | |
9578 | ||
9579 | int *mismatch_positions; | |
9580 | ||
9581 | #ifdef HAVE_ALLOCA | |
9582 | if (querylength <= MAX_STACK_READLENGTH) { | |
9583 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9584 | } else { | |
9585 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
9586 | } | |
9587 | #else | |
9588 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10595 | 9589 | #endif |
10596 | 9590 | |
10597 | 9591 | debug4e(printf("Entering find_spliceends_shortend with %d anchor segments\n",nanchors)); |
10683 | 9677 | sensedir,segment->chrnum,segment->chroffset, |
10684 | 9678 | segment->chrhigh,segment->chrlength)) != NULL) { |
10685 | 9679 | debug4e(printf("=> %s donor: known at %d (%d mismatches)\n", |
10686 | plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches)); | |
9680 | plusp == true ? "plus" : "minus",Substring_siteD_pos(hit),nmismatches)); | |
10687 | 9681 | (*shortend_donors)[nmismatches] = List_push((*shortend_donors)[nmismatches],(void *) hit); |
10688 | 9682 | } |
10689 | 9683 | |
10700 | 9694 | sensedir,segment->chrnum,segment->chroffset, |
10701 | 9695 | segment->chrhigh,segment->chrlength)) != NULL) { |
10702 | 9696 | debug4e(printf("=> %s antiacceptor : known at %d (%d mismatches)\n", |
10703 | plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches)); | |
9697 | plusp == true ? "plus" : "minus",Substring_siteA_pos(hit),nmismatches)); | |
10704 | 9698 | (*shortend_antiacceptors)[nmismatches] = List_push((*shortend_antiacceptors)[nmismatches],(void *) hit); |
10705 | 9699 | } |
10706 | 9700 | } |
10772 | 9766 | sensedir,segment->chrnum,segment->chroffset, |
10773 | 9767 | segment->chrhigh,segment->chrlength)) != NULL) { |
10774 | 9768 | debug4e(printf("=> %s acceptor: known at %d (%d mismatches)\n", |
10775 | plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches)); | |
9769 | plusp == true ? "plus" : "minus",Substring_siteA_pos(hit),nmismatches)); | |
10776 | 9770 | (*shortend_acceptors)[nmismatches] = List_push((*shortend_acceptors)[nmismatches],(void *) hit); |
10777 | 9771 | } |
10778 | 9772 | |
10789 | 9783 | sensedir,segment->chrnum,segment->chroffset, |
10790 | 9784 | segment->chrhigh,segment->chrlength)) != NULL) { |
10791 | 9785 | debug4e(printf("=> %s antidonor: known at %d (%d mismatches)\n", |
10792 | plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches)); | |
9786 | plusp == true ? "plus" : "minus",Substring_siteD_pos(hit),nmismatches)); | |
10793 | 9787 | (*shortend_antidonors)[nmismatches] = List_push((*shortend_antidonors)[nmismatches],(void *) hit); |
10794 | 9788 | } |
10795 | 9789 | } |
10799 | 9793 | } |
10800 | 9794 | } |
10801 | 9795 | |
9796 | #ifdef HAVE_ALLOCA | |
9797 | if (querylength <= MAX_STACK_READLENGTH) { | |
9798 | FREEA(mismatch_positions); | |
9799 | } else { | |
9800 | FREE(mismatch_positions); | |
9801 | } | |
9802 | #else | |
9803 | FREE(mismatch_positions); | |
9804 | #endif | |
9805 | ||
10802 | 9806 | return; |
10803 | 9807 | } |
10804 | 9808 | |
10826 | 9830 | int *floors_from_neg3, *floors_to_pos3; |
10827 | 9831 | |
10828 | 9832 | int splice_pos_start, splice_pos_end; |
9833 | int *mismatch_positions; | |
10829 | 9834 | |
10830 | 9835 | #ifdef HAVE_ALLOCA |
10831 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10832 | #else | |
10833 | int mismatch_positions[MAX_READLENGTH+1]; | |
10834 | #endif | |
10835 | ||
9836 | if (querylength <= MAX_STACK_READLENGTH) { | |
9837 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
9838 | } else { | |
9839 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
9840 | } | |
9841 | #else | |
9842 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
9843 | #endif | |
10836 | 9844 | |
10837 | 9845 | debug4e(printf("Entering find_spliceends_distant_dna with %d anchor segments\n",nanchors)); |
10838 | 9846 | |
10900 | 9908 | querylength,/*plusp*/true,genestrand, |
10901 | 9909 | segment->chrnum,segment->chroffset, |
10902 | 9910 | segment->chrhigh,segment->chrlength)) != NULL) { |
10903 | debug4e(printf("=> plus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches)); | |
9911 | debug4e(printf("=> plus startfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches)); | |
10904 | 9912 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
10905 | 9913 | (*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit); |
10906 | 9914 | } |
10956 | 9964 | querylength,/*plusp*/true,genestrand, |
10957 | 9965 | segment->chrnum,segment->chroffset, |
10958 | 9966 | segment->chrhigh,segment->chrlength)) != NULL) { |
10959 | debug4e(printf("=> plus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches)); | |
9967 | debug4e(printf("=> plus endfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches)); | |
10960 | 9968 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
10961 | 9969 | (*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit); |
10962 | 9970 | } |
10971 | 9979 | } |
10972 | 9980 | } |
10973 | 9981 | } |
9982 | ||
9983 | #ifdef HAVE_ALLOCA | |
9984 | if (querylength <= MAX_STACK_READLENGTH) { | |
9985 | FREEA(mismatch_positions); | |
9986 | } else { | |
9987 | FREE(mismatch_positions); | |
9988 | } | |
9989 | #else | |
9990 | FREE(mismatch_positions); | |
9991 | #endif | |
10974 | 9992 | |
10975 | 9993 | return; |
10976 | 9994 | } |
10999 | 10017 | int *floors_from_neg3, *floors_to_pos3; |
11000 | 10018 | |
11001 | 10019 | int splice_pos_start, splice_pos_end; |
10020 | int *mismatch_positions; | |
11002 | 10021 | |
11003 | 10022 | #ifdef HAVE_ALLOCA |
11004 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11005 | #else | |
11006 | int mismatch_positions[MAX_READLENGTH+1]; | |
10023 | if (querylength <= MAX_STACK_READLENGTH) { | |
10024 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10025 | } else { | |
10026 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10027 | } | |
10028 | #else | |
10029 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11007 | 10030 | #endif |
11008 | 10031 | |
11009 | 10032 | |
11073 | 10096 | querylength,/*plusp*/false,genestrand, |
11074 | 10097 | segment->chrnum,segment->chroffset, |
11075 | 10098 | segment->chrhigh,segment->chrlength)) != NULL) { |
11076 | debug4e(printf("=> minus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches)); | |
10099 | debug4e(printf("=> minus endfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches)); | |
11077 | 10100 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11078 | 10101 | (*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit); |
11079 | 10102 | } |
11129 | 10152 | querylength,/*plusp*/false,genestrand, |
11130 | 10153 | segment->chrnum,segment->chroffset, |
11131 | 10154 | segment->chrhigh,segment->chrlength)) != NULL) { |
11132 | debug4e(printf("=> minus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches)); | |
10155 | debug4e(printf("=> minus startfrag: at %d (%d mismatches)\n",Substring_siteN_pos(hit),nmismatches)); | |
11133 | 10156 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11134 | 10157 | (*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit); |
11135 | 10158 | } |
11143 | 10166 | } |
11144 | 10167 | } |
11145 | 10168 | } |
10169 | ||
10170 | #ifdef HAVE_ALLOCA | |
10171 | if (querylength <= MAX_STACK_READLENGTH) { | |
10172 | FREEA(mismatch_positions); | |
10173 | } else { | |
10174 | FREE(mismatch_positions); | |
10175 | } | |
10176 | #else | |
10177 | FREE(mismatch_positions); | |
10178 | #endif | |
11146 | 10179 | |
11147 | 10180 | return; |
11148 | 10181 | } |
11179 | 10212 | int sensedir; |
11180 | 10213 | |
11181 | 10214 | int splice_pos_start, splice_pos_end; |
11182 | ||
11183 | #ifdef HAVE_ALLOCA | |
11184 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11185 | int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11186 | int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11187 | int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11188 | int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11189 | int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11190 | int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11191 | int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11192 | int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11193 | int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11194 | int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11195 | #else | |
11196 | int mismatch_positions[MAX_READLENGTH+1]; | |
11197 | int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1]; | |
11198 | int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
11199 | int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1]; | |
11200 | int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1]; | |
11201 | int positions_alloc[MAX_READLENGTH+1]; | |
11202 | int knowni_alloc[MAX_READLENGTH+1]; | |
11203 | #endif | |
11204 | ||
11205 | 10215 | int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown; |
11206 | 10216 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
11207 | 10217 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
11208 | 10218 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
11209 | 10219 | |
10220 | int *mismatch_positions; | |
10221 | int *segment_donor_knownpos, *segment_acceptor_knownpos, *segment_antidonor_knownpos, *segment_antiacceptor_knownpos, | |
10222 | *segment_donor_knowni, *segment_acceptor_knowni, *segment_antidonor_knowni, *segment_antiacceptor_knowni; | |
10223 | int *positions_alloc, *knowni_alloc; | |
10224 | ||
10225 | #ifdef HAVE_ALLOCA | |
10226 | if (querylength <= MAX_STACK_READLENGTH) { | |
10227 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10228 | segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10229 | segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10230 | segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10231 | segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10232 | segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10233 | segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10234 | segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10235 | segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10236 | positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10237 | knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
10238 | } else { | |
10239 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10240 | segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10241 | segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10242 | segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10243 | segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10244 | segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10245 | segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10246 | segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10247 | segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10248 | positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10249 | knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10250 | } | |
10251 | #else | |
10252 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10253 | segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10254 | segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10255 | segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10256 | segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10257 | segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10258 | segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10259 | segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10260 | segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10261 | positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10262 | knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
10263 | #endif | |
11210 | 10264 | |
11211 | 10265 | debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors)); |
11212 | 10266 | |
11219 | 10273 | assert(segment->diagonal != (Univcoord_T) -1); |
11220 | 10274 | |
11221 | 10275 | segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */ |
11222 | last_querypos = segment->querypos3 + index1part; | |
10276 | if ((first_querypos = segment->querypos5 - (index1interval - 1)) < 0) { | |
10277 | first_querypos = 0; | |
10278 | } | |
10279 | if ((last_querypos = segment->querypos3 + index1part + (index1interval - 1)) > querylength) { | |
10280 | last_querypos = querylength; | |
10281 | } | |
11223 | 10282 | |
11224 | 10283 | debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n", |
11225 | 10284 | max_mismatches_allowed,(unsigned long long) segment->diagonal, |
11238 | 10297 | /* Find splices on genomic right */ |
11239 | 10298 | if (plusp) { |
11240 | 10299 | /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */ |
11241 | if (segment->querypos5 < index1part && last_querypos < query_lastpos) { | |
10300 | if (first_querypos < index1part && last_querypos < query_lastpos) { | |
11242 | 10301 | /* genomic left anchor */ |
11243 | 10302 | debug4e(printf("Searching genomic right: plus genomic left anchor\n")); |
11244 | 10303 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
11246 | 10305 | #if 0 |
11247 | 10306 | /*pos5*/0,/*pos3*/querylength, |
11248 | 10307 | #else |
11249 | /*pos5 (was 0)*/segment->querypos5,/*pos3*/querylength, | |
10308 | /*pos5 (was 0)*/first_querypos,/*pos3*/querylength, | |
11250 | 10309 | #endif |
11251 | 10310 | plusp,genestrand); |
11252 | 10311 | debug4e( |
11261 | 10320 | #if 0 |
11262 | 10321 | splice_pos_start = index1part; |
11263 | 10322 | #else |
11264 | splice_pos_start = segment->querypos5; | |
10323 | splice_pos_start = first_querypos; | |
11265 | 10324 | #endif |
11266 | 10325 | if (nmismatches_left <= max_mismatches_allowed) { |
11267 | 10326 | splice_pos_end = querylength - 1; |
11269 | 10328 | splice_pos_end = querylength - 1; |
11270 | 10329 | } |
11271 | 10330 | |
11272 | } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) { | |
10331 | } else if (first_querypos > index1part && last_querypos > query_lastpos) { | |
11273 | 10332 | /* genomic right anchor. No need to find splices on genomic right */ |
11274 | 10333 | debug4e(printf("Searching genomic right: plus genomic right anchor\n")); |
11275 | 10334 | splice_pos_start = querylength; |
11276 | 10335 | splice_pos_end = 0; |
11277 | 10336 | |
11278 | } else if (segment->querypos5 > index1part && last_querypos < query_lastpos && | |
10337 | } else if (first_querypos > index1part && last_querypos < query_lastpos && | |
11279 | 10338 | segment->spliceable_low_p == true) { |
11280 | 10339 | /* middle anchor */ |
11281 | 10340 | debug4e(printf("Searching genomic right: plus middle anchor\n")); |
11282 | 10341 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
11283 | 10342 | query_compress,/*left*/segment_left, |
11284 | /*pos5*/segment->querypos5,/*pos3*/querylength, | |
10343 | /*pos5*/first_querypos,/*pos3*/querylength, | |
11285 | 10344 | plusp,genestrand); |
11286 | 10345 | debug4e( |
11287 | 10346 | printf("%d mismatches on left (%d allowed) at:", |
11292 | 10351 | printf("\n"); |
11293 | 10352 | ); |
11294 | 10353 | |
11295 | splice_pos_start = segment->querypos5; | |
10354 | splice_pos_start = first_querypos; | |
11296 | 10355 | if (nmismatches_left <= max_mismatches_allowed) { |
11297 | 10356 | splice_pos_end = querylength - 1; |
11298 | 10357 | } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) { |
11308 | 10367 | |
11309 | 10368 | } else { |
11310 | 10369 | /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */ |
11311 | if (segment->querypos5 < index1part && last_querypos < query_lastpos) { | |
10370 | if (first_querypos < index1part && last_querypos < query_lastpos) { | |
11312 | 10371 | /* genomic right anchor. No need to find splices on genomic right */ |
11313 | 10372 | debug4e(printf("Searching genomic right: minus genomic right anchor\n")); |
11314 | 10373 | splice_pos_start = querylength; |
11315 | 10374 | splice_pos_end = 0; |
11316 | 10375 | |
11317 | } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) { | |
10376 | } else if (first_querypos > index1part && last_querypos > query_lastpos) { | |
11318 | 10377 | /* genomic left anchor */ |
11319 | 10378 | debug4e(printf("Searching genomic right: minus genomic left anchor\n")); |
11320 | 10379 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
11346 | 10405 | splice_pos_end = querylength - 1; |
11347 | 10406 | } |
11348 | 10407 | |
11349 | } else if (segment->querypos5 > index1part && last_querypos < query_lastpos && | |
10408 | } else if (first_querypos > index1part && last_querypos < query_lastpos && | |
11350 | 10409 | segment->spliceable_low_p == true) { |
11351 | 10410 | /* middle anchor */ |
11352 | 10411 | debug4e(printf("Searching genomic right: minus middle anchor\n")); |
11353 | 10412 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
11354 | 10413 | query_compress,/*left*/segment_left, |
11355 | /*pos5*/querylength - segment->querypos3 - index1part, | |
10414 | /*pos5*/querylength - last_querypos, | |
11356 | 10415 | /*pos3*/querylength,plusp,genestrand); |
11357 | 10416 | debug4e( |
11358 | 10417 | printf("%d mismatches on left (%d allowed) at:", |
11441 | 10500 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
11442 | 10501 | |
11443 | 10502 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i], |
11444 | splice_pos,/*substring_querystart*/segment->querypos5, | |
10503 | splice_pos,/*substring_querystart*/first_querypos, | |
11445 | 10504 | /*substring_queryend*/last_querypos, |
11446 | 10505 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
11447 | 10506 | querylength,plusp,genestrand, |
11449 | 10508 | segment->chrhigh,segment->chrlength)) != NULL) { |
11450 | 10509 | debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n", |
11451 | 10510 | plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset), |
11452 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
10511 | Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11453 | 10512 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11454 | 10513 | (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit); |
11455 | 10514 | } |
11462 | 10521 | debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
11463 | 10522 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
11464 | 10523 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1, |
11465 | splice_pos,/*substring_querystart*/segment->querypos5, | |
10524 | splice_pos,/*substring_querystart*/first_querypos, | |
11466 | 10525 | /*substring_queryend*/last_querypos, |
11467 | 10526 | nmismatches,prob,/*left*/segment_left,query_compress, |
11468 | 10527 | querylength,plusp,genestrand, |
11469 | 10528 | sensedir,segment->chrnum,segment->chroffset, |
11470 | 10529 | segment->chrhigh,segment->chrlength)) != NULL) { |
11471 | 10530 | debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n", |
11472 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
10531 | plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches, | |
11473 | 10532 | Substring_querystart(hit),Substring_queryend(hit))); |
11474 | 10533 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11475 | 10534 | (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit); |
11522 | 10581 | debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
11523 | 10582 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
11524 | 10583 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i], |
11525 | splice_pos,/*substring_querystart*/segment->querypos5, | |
10584 | splice_pos,/*substring_querystart*/first_querypos, | |
11526 | 10585 | /*substring_queryend*/last_querypos, |
11527 | 10586 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
11528 | 10587 | querylength,plusp,genestrand, |
11530 | 10589 | segment->chrhigh,segment->chrlength)) != NULL) { |
11531 | 10590 | debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n", |
11532 | 10591 | plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset), |
11533 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
10592 | Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11534 | 10593 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11535 | 10594 | (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit); |
11536 | 10595 | } |
11543 | 10602 | debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
11544 | 10603 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
11545 | 10604 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1, |
11546 | splice_pos,/*substring_querystart*/segment->querypos5, | |
10605 | splice_pos,/*substring_querystart*/first_querypos, | |
11547 | 10606 | /*substring_queryend*/last_querypos, |
11548 | 10607 | nmismatches,prob,/*left*/segment_left,query_compress, |
11549 | 10608 | querylength,plusp,genestrand, |
11550 | 10609 | sensedir,segment->chrnum,segment->chroffset, |
11551 | 10610 | segment->chrhigh,segment->chrlength)) != NULL) { |
11552 | 10611 | debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n", |
11553 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
10612 | plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches, | |
11554 | 10613 | Substring_querystart(hit),Substring_queryend(hit))); |
11555 | 10614 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11556 | 10615 | (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit); |
11567 | 10626 | /* Find splices on genomic left */ |
11568 | 10627 | if (plusp) { |
11569 | 10628 | /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */ |
11570 | if (segment->querypos5 < index1part && last_querypos < query_lastpos) { | |
10629 | if (first_querypos < index1part && last_querypos < query_lastpos) { | |
11571 | 10630 | /* genomic left anchor. No need to find splices on genomic left. */ |
11572 | 10631 | debug4e(printf("Searching genomic left: plus genomic left anchor\n")); |
11573 | 10632 | splice_pos_start = querylength; |
11574 | 10633 | splice_pos_end = 0; |
11575 | 10634 | |
11576 | } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) { | |
10635 | } else if (first_querypos > index1part && last_querypos > query_lastpos) { | |
11577 | 10636 | /* genomic right anchor */ |
11578 | 10637 | debug4e(printf("Searching genomic left: plus genomic right anchor\n")); |
11579 | 10638 | nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed, |
11603 | 10662 | splice_pos_start = 1; |
11604 | 10663 | } |
11605 | 10664 | |
11606 | } else if (segment->querypos5 > index1part && last_querypos < query_lastpos && | |
10665 | } else if (first_querypos > index1part && last_querypos < query_lastpos && | |
11607 | 10666 | segment->spliceable_high_p == true) { |
11608 | 10667 | /* middle anchor */ |
11609 | 10668 | debug4e(printf("Searching genomic left: plus middle anchor\n")); |
11635 | 10694 | |
11636 | 10695 | } else { |
11637 | 10696 | /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */ |
11638 | if (segment->querypos5 < index1part && last_querypos < query_lastpos) { | |
10697 | if (first_querypos < index1part && last_querypos < query_lastpos) { | |
11639 | 10698 | /* genomic right anchor */ |
11640 | 10699 | debug4e(printf("Searching genomic left: minus genomic right anchor\n")); |
11641 | 10700 | nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed, |
11643 | 10702 | #if 0 |
11644 | 10703 | /*pos5*/0,/*pos3*/querylength, |
11645 | 10704 | #else |
11646 | /*pos5*/0,/*pos3 (was querylength)*/querylength - segment->querypos5, | |
10705 | /*pos5*/0,/*pos3 (was querylength)*/querylength - first_querypos, | |
11647 | 10706 | #endif |
11648 | 10707 | plusp,genestrand); |
11649 | 10708 | debug4e( |
11657 | 10716 | #if 0 |
11658 | 10717 | splice_pos_end = query_lastpos; |
11659 | 10718 | #else |
11660 | splice_pos_end = querylength - segment->querypos5; | |
10719 | splice_pos_end = querylength - first_querypos; | |
11661 | 10720 | #endif |
11662 | 10721 | if (nmismatches_right <= max_mismatches_allowed) { |
11663 | 10722 | splice_pos_start = 1; |
11665 | 10724 | splice_pos_start = 1; |
11666 | 10725 | } |
11667 | 10726 | |
11668 | } else if (segment->querypos5 > index1part && last_querypos > query_lastpos) { | |
10727 | } else if (first_querypos > index1part && last_querypos > query_lastpos) { | |
11669 | 10728 | /* genomic left anchor. No need to find splices on genomic left. */ |
11670 | 10729 | debug4e(printf("Searching genomic left: minus genomic left anchor\n")); |
11671 | 10730 | splice_pos_start = querylength; |
11672 | 10731 | splice_pos_end = 0; |
11673 | 10732 | |
11674 | } else if (segment->querypos5 > index1part && last_querypos < query_lastpos && | |
10733 | } else if (first_querypos > index1part && last_querypos < query_lastpos && | |
11675 | 10734 | segment->spliceable_high_p == true) { |
11676 | 10735 | /* middle anchor */ |
11677 | 10736 | debug4e(printf("Searching genomic left: minus middle anchor\n")); |
11678 | 10737 | nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed, |
11679 | 10738 | query_compress,/*left*/segment_left, |
11680 | /*pos5*/0,/*pos3*/querylength - segment->querypos5, | |
10739 | /*pos5*/0,/*pos3*/querylength - first_querypos, | |
11681 | 10740 | plusp,genestrand); |
11682 | 10741 | debug4e( |
11683 | 10742 | printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed); |
11687 | 10746 | printf("\n"); |
11688 | 10747 | ); |
11689 | 10748 | |
11690 | splice_pos_end = querylength - segment->querypos5; | |
10749 | splice_pos_end = querylength - first_querypos; | |
11691 | 10750 | if (nmismatches_right <= max_mismatches_allowed) { |
11692 | 10751 | splice_pos_start = 1; |
11693 | 10752 | } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) { |
11773 | 10832 | segment->chrhigh,segment->chrlength)) != NULL) { |
11774 | 10833 | debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n", |
11775 | 10834 | plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset), |
11776 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
10835 | Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11777 | 10836 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11778 | 10837 | (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit); |
11779 | 10838 | } |
11793 | 10852 | sensedir,segment->chrnum,segment->chroffset, |
11794 | 10853 | segment->chrhigh,segment->chrlength)) != NULL) { |
11795 | 10854 | debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n", |
11796 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
10855 | plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches, | |
11797 | 10856 | Substring_querystart(hit),Substring_queryend(hit))); |
11798 | 10857 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11799 | 10858 | (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit); |
11854 | 10913 | segment->chrhigh,segment->chrlength)) != NULL) { |
11855 | 10914 | debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n", |
11856 | 10915 | plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset), |
11857 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
10916 | Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11858 | 10917 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11859 | 10918 | (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit); |
11860 | 10919 | } |
11874 | 10933 | sensedir,segment->chrnum,segment->chroffset, |
11875 | 10934 | segment->chrhigh,segment->chrlength)) != NULL) { |
11876 | 10935 | debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n", |
11877 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
10936 | plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches, | |
11878 | 10937 | Substring_querystart(hit),Substring_queryend(hit))); |
11879 | 10938 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
11880 | 10939 | (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit); |
11888 | 10947 | } |
11889 | 10948 | } |
11890 | 10949 | } |
10950 | ||
10951 | #ifdef HAVE_ALLOCA | |
10952 | if (querylength <= MAX_STACK_READLENGTH) { | |
10953 | FREEA(mismatch_positions); | |
10954 | FREEA(segment_donor_knownpos); | |
10955 | FREEA(segment_acceptor_knownpos); | |
10956 | FREEA(segment_antidonor_knownpos); | |
10957 | FREEA(segment_antiacceptor_knownpos); | |
10958 | FREEA(segment_donor_knowni); | |
10959 | FREEA(segment_acceptor_knowni); | |
10960 | FREEA(segment_antidonor_knowni); | |
10961 | FREEA(segment_antiacceptor_knowni); | |
10962 | FREEA(positions_alloc); | |
10963 | FREEA(knowni_alloc); | |
10964 | } else { | |
10965 | FREE(mismatch_positions); | |
10966 | FREE(segment_donor_knownpos); | |
10967 | FREE(segment_acceptor_knownpos); | |
10968 | FREE(segment_antidonor_knownpos); | |
10969 | FREE(segment_antiacceptor_knownpos); | |
10970 | FREE(segment_donor_knowni); | |
10971 | FREE(segment_acceptor_knowni); | |
10972 | FREE(segment_antidonor_knowni); | |
10973 | FREE(segment_antiacceptor_knowni); | |
10974 | FREE(positions_alloc); | |
10975 | FREE(knowni_alloc); | |
10976 | #else | |
10977 | FREE(mismatch_positions); | |
10978 | FREE(segment_donor_knownpos); | |
10979 | FREE(segment_acceptor_knownpos); | |
10980 | FREE(segment_antidonor_knownpos); | |
10981 | FREE(segment_antiacceptor_knownpos); | |
10982 | FREE(segment_donor_knowni); | |
10983 | FREE(segment_acceptor_knowni); | |
10984 | FREE(segment_antidonor_knowni); | |
10985 | FREE(segment_antiacceptor_knowni); | |
10986 | FREE(positions_alloc); | |
10987 | FREE(knowni_alloc); | |
10988 | #endif | |
11891 | 10989 | |
11892 | 10990 | return; |
11893 | 10991 | } |
11921 | 11019 | Substring_T hit; |
11922 | 11020 | Univcoord_T segment_left; |
11923 | 11021 | int nmismatches, j, i; |
11924 | int splice_pos, last_querypos; | |
11022 | int splice_pos, first_querypos, last_querypos; | |
11925 | 11023 | double prob; |
11926 | 11024 | |
11927 | 11025 | int nmismatches_left, nmismatches_right; |
11929 | 11027 | int sensedir; |
11930 | 11028 | |
11931 | 11029 | int splice_pos_start, splice_pos_end; |
11932 | ||
11933 | #ifdef HAVE_ALLOCA | |
11934 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11935 | int *segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11936 | int *segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11937 | int *segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11938 | int *segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11939 | int *segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11940 | int *segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11941 | int *segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11942 | int *segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11943 | int *positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11944 | int *knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11945 | #else | |
11946 | int mismatch_positions[MAX_READLENGTH+1]; | |
11947 | int segment_donor_knownpos[MAX_READLENGTH+1], segment_acceptor_knownpos[MAX_READLENGTH+1]; | |
11948 | int segment_antidonor_knownpos[MAX_READLENGTH+1], segment_antiacceptor_knownpos[MAX_READLENGTH+1]; | |
11949 | int segment_donor_knowni[MAX_READLENGTH+1], segment_acceptor_knowni[MAX_READLENGTH+1]; | |
11950 | int segment_antidonor_knowni[MAX_READLENGTH+1], segment_antiacceptor_knowni[MAX_READLENGTH+1]; | |
11951 | int positions_alloc[MAX_READLENGTH+1]; | |
11952 | int knowni_alloc[MAX_READLENGTH+1]; | |
11953 | #endif | |
11954 | ||
11955 | 11030 | int segment_donor_nknown, segment_acceptor_nknown, segment_antidonor_nknown, segment_antiacceptor_nknown; |
11956 | 11031 | int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites; |
11957 | 11032 | int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions; |
11958 | 11033 | int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni; |
11959 | 11034 | |
11035 | int *mismatch_positions; | |
11036 | int *segment_donor_knownpos, *segment_acceptor_knownpos, *segment_antidonor_knownpos, *segment_antiacceptor_knownpos, | |
11037 | *segment_donor_knowni, *segment_acceptor_knowni, *segment_antidonor_knowni, *segment_antiacceptor_knowni; | |
11038 | int *positions_alloc, *knowni_alloc; | |
11039 | ||
11040 | #ifdef HAVE_ALLOCA | |
11041 | if (querylength <= MAX_STACK_READLENGTH) { | |
11042 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11043 | segment_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11044 | segment_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11045 | segment_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11046 | segment_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11047 | segment_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11048 | segment_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11049 | segment_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11050 | segment_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11051 | positions_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11052 | knowni_alloc = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11053 | } else { | |
11054 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11055 | segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11056 | segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11057 | segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11058 | segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11059 | segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11060 | segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11061 | segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11062 | segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11063 | positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11064 | knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11065 | } | |
11066 | #else | |
11067 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11068 | segment_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11069 | segment_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11070 | segment_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11071 | segment_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11072 | segment_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11073 | segment_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11074 | segment_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11075 | segment_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11076 | positions_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11077 | knowni_alloc = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11078 | #endif | |
11960 | 11079 | |
11961 | 11080 | debug4e(printf("Entering find_spliceends_distant_rna with %d anchor segments\n",nanchors)); |
11962 | 11081 | |
11969 | 11088 | assert(segment->diagonal != (Univcoord_T) -1); |
11970 | 11089 | |
11971 | 11090 | segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */ |
11972 | last_querypos = segment->querypos3 + index1part; | |
11973 | assert(last_querypos <= querylength); | |
11091 | if ((first_querypos = segment->querypos5 - (index1interval - 1)) < 0) { | |
11092 | first_querypos = 0; | |
11093 | } | |
11094 | if ((last_querypos = segment->querypos3 + index1part + (index1interval - 1)) > querylength) { | |
11095 | last_querypos = querylength; | |
11096 | } | |
11974 | 11097 | |
11975 | 11098 | debug4e(printf("find_spliceends_distant_rna: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d, plusp %d\n", |
11976 | 11099 | max_mismatches_allowed,(unsigned long long) segment->diagonal, |
11990 | 11113 | if (plusp) { |
11991 | 11114 | /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */ |
11992 | 11115 | if (last_querypos < query_lastpos && |
11993 | (segment->querypos5 < index1part || segment->spliceable_low_p == true)) { | |
11116 | (first_querypos < index1part || segment->spliceable_low_p == true)) { | |
11994 | 11117 | /* genomic left anchor or middle anchor */ |
11995 | debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor\n")); | |
11118 | debug4e(printf("Searching genomic right: plus genomic left anchor or middle anchor: %d..%d\n", | |
11119 | segment->querypos5,querylength)); | |
11996 | 11120 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
11997 | 11121 | query_compress,/*left*/segment_left, |
11998 | 11122 | /*pos5*/segment->querypos5,/*pos3*/querylength, |
12006 | 11130 | printf("\n"); |
12007 | 11131 | ); |
12008 | 11132 | |
12009 | splice_pos_start = segment->querypos5 + 1; | |
11133 | splice_pos_start = first_querypos + 1; | |
12010 | 11134 | if (nmismatches_left <= max_mismatches_allowed) { |
12011 | 11135 | splice_pos_end = querylength - 1 - 1; |
12012 | 11136 | } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1 - 1) { |
12022 | 11146 | |
12023 | 11147 | } else { |
12024 | 11148 | /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */ |
12025 | if (segment->querypos5 > index1part && | |
11149 | if (first_querypos > index1part && | |
12026 | 11150 | (last_querypos > query_lastpos || segment->spliceable_low_p == true)) { |
12027 | 11151 | /* genomic left anchor or middle anchor */ |
12028 | debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor\n")); | |
11152 | debug4e(printf("Searching genomic right: minus genomic left anchor or middle anchor: %d..%d\n", | |
11153 | querylength - segment->querypos3 - index1part,querylength)); | |
12029 | 11154 | nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed, |
12030 | 11155 | query_compress,/*left*/segment_left, |
12031 | /*pos5*/querylength - last_querypos, | |
11156 | /*pos5*/querylength - segment->querypos3 - index1part, | |
12032 | 11157 | /*pos3*/querylength,plusp,genestrand); |
12033 | 11158 | debug4e( |
12034 | 11159 | printf("%d mismatches on left (%d allowed) at:", |
12119 | 11244 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
12120 | 11245 | |
12121 | 11246 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i], |
12122 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11247 | splice_pos,/*substring_querystart*/first_querypos, | |
12123 | 11248 | /*substring_queryend*/last_querypos, |
12124 | 11249 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
12125 | 11250 | querylength,plusp,genestrand, |
12127 | 11252 | segment->chrhigh,segment->chrlength)) != NULL) { |
12128 | 11253 | debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n", |
12129 | 11254 | plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset), |
12130 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11255 | Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
12131 | 11256 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12132 | 11257 | (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit); |
12133 | 11258 | } |
12140 | 11265 | debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12141 | 11266 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
12142 | 11267 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1, |
12143 | splice_pos,/*substring_querystart*/segment->querypos5, | |
12144 | /*substring_queryend*/last_querypos, | |
11268 | splice_pos,/*substring_querystart*/first_querypos, | |
11269 | /*substring_queryend, as last_querypos*/querylength, | |
12145 | 11270 | nmismatches,prob,/*left*/segment_left,query_compress, |
12146 | 11271 | querylength,plusp,genestrand, |
12147 | 11272 | sensedir,segment->chrnum,segment->chroffset, |
12148 | 11273 | segment->chrhigh,segment->chrlength)) != NULL) { |
12149 | 11274 | debug4e(printf("=> %s donor: %f at %d (%d mismatches) %d..%d\n", |
12150 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
11275 | plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches, | |
12151 | 11276 | Substring_querystart(hit),Substring_queryend(hit))); |
12152 | 11277 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12153 | 11278 | (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit); |
12202 | 11327 | debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12203 | 11328 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
12204 | 11329 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i], |
12205 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11330 | splice_pos,/*substring_querystart*/first_querypos, | |
12206 | 11331 | /*substring_queryend*/last_querypos, |
12207 | 11332 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
12208 | 11333 | querylength,plusp,genestrand, |
12210 | 11335 | segment->chrhigh,segment->chrlength)) != NULL) { |
12211 | 11336 | debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n", |
12212 | 11337 | plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset), |
12213 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11338 | Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
12214 | 11339 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12215 | 11340 | (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit); |
12216 | 11341 | } |
12223 | 11348 | debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12224 | 11349 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end)); |
12225 | 11350 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1, |
12226 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11351 | splice_pos,/*substring_querystart*/first_querypos, | |
12227 | 11352 | /*substring_queryend*/last_querypos, |
12228 | 11353 | nmismatches,prob,/*left*/segment_left,query_compress, |
12229 | 11354 | querylength,plusp,genestrand, |
12230 | 11355 | sensedir,segment->chrnum,segment->chroffset, |
12231 | 11356 | segment->chrhigh,segment->chrlength)) != NULL) { |
12232 | 11357 | debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches) %d..%d\n", |
12233 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
11358 | plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches, | |
12234 | 11359 | Substring_querystart(hit),Substring_queryend(hit))); |
12235 | 11360 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12236 | 11361 | (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit); |
12247 | 11372 | /* Find splices on genomic left */ |
12248 | 11373 | if (plusp) { |
12249 | 11374 | /* ? require that floors_to_pos3[segment->querypos3] <= max_mismatches_allowed */ |
12250 | if (segment->querypos5 > index1part && | |
11375 | if (first_querypos > index1part && | |
12251 | 11376 | (last_querypos > query_lastpos || segment->spliceable_high_p == true)) { |
12252 | 11377 | /* genomic right anchor or middle anchor */ |
12253 | debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor\n")); | |
11378 | debug4e(printf("Searching genomic left: plus genomic right anchor or middle anchor: %d..%d\n", | |
11379 | 0,segment->querypos3 + index1part)); | |
12254 | 11380 | nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed, |
12255 | 11381 | query_compress,/*left*/segment_left, |
12256 | /*pos5*/0,/*pos3*/last_querypos, | |
11382 | /*pos5*/0,/*pos3*/segment->querypos3 + index1part, | |
12257 | 11383 | plusp,genestrand); |
12258 | 11384 | debug4e( |
12259 | 11385 | printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed); |
12280 | 11406 | } else { |
12281 | 11407 | /* ? require that floors_from_neg3[segment->querypos5] <= max_mismatches_allowed */ |
12282 | 11408 | if (last_querypos < query_lastpos && |
12283 | (segment->querypos5 < index1part || segment->spliceable_high_p == true)) { | |
11409 | (first_querypos < index1part || segment->spliceable_high_p == true)) { | |
12284 | 11410 | /* genomic right anchor or middle anchor*/ |
12285 | debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor\n")); | |
11411 | debug4e(printf("Searching genomic left: minus genomic right anchor or middle anchor: %d..%d\n", | |
11412 | 0,querylength - segment->querypos5)); | |
12286 | 11413 | nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed, |
12287 | 11414 | query_compress,/*left*/segment_left, |
12288 | 11415 | /*pos5*/0,/*pos3*/querylength - segment->querypos5, |
12295 | 11422 | printf("\n"); |
12296 | 11423 | ); |
12297 | 11424 | |
12298 | splice_pos_end = querylength - segment->querypos5 - 1 - 1; | |
11425 | splice_pos_end = querylength - first_querypos - 1 - 1; | |
12299 | 11426 | if (nmismatches_right <= max_mismatches_allowed) { |
12300 | 11427 | splice_pos_start = 1; |
12301 | 11428 | } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) { |
12375 | 11502 | debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12376 | 11503 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start)); |
12377 | 11504 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i], |
12378 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11505 | splice_pos,/*substring_querystart*/first_querypos, | |
12379 | 11506 | /*substring_queryend*/last_querypos, |
12380 | 11507 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
12381 | 11508 | querylength,plusp,genestrand, |
12383 | 11510 | segment->chrhigh,segment->chrlength)) != NULL) { |
12384 | 11511 | debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n", |
12385 | 11512 | plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset), |
12386 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11513 | Substring_siteA_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
12387 | 11514 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12388 | 11515 | (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit); |
12389 | 11516 | } |
12396 | 11523 | debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12397 | 11524 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start)); |
12398 | 11525 | if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1, |
12399 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11526 | splice_pos,/*substring_querystart*/first_querypos, | |
12400 | 11527 | /*substring_queryend*/last_querypos, |
12401 | 11528 | nmismatches,prob,/*left*/segment_left,query_compress, |
12402 | 11529 | querylength,plusp,genestrand, |
12403 | 11530 | sensedir,segment->chrnum,segment->chroffset, |
12404 | 11531 | segment->chrhigh,segment->chrlength)) != NULL) { |
12405 | 11532 | debug4e(printf("=> %s acceptor: %f at %d (%d mismatches) %d..%d\n", |
12406 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
11533 | plusp == true ? "plus" : "minus",prob,Substring_siteA_pos(hit),nmismatches, | |
12407 | 11534 | Substring_querystart(hit),Substring_queryend(hit))); |
12408 | 11535 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12409 | 11536 | (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit); |
12458 | 11585 | debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12459 | 11586 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start)); |
12460 | 11587 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i], |
12461 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11588 | splice_pos,/*substring_querystart*/first_querypos, | |
12462 | 11589 | /*substring_queryend*/last_querypos, |
12463 | 11590 | nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress, |
12464 | 11591 | querylength,plusp,genestrand, |
12466 | 11593 | segment->chrhigh,segment->chrlength)) != NULL) { |
12467 | 11594 | debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n", |
12468 | 11595 | plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset), |
12469 | Substring_chimera_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
11596 | Substring_siteD_pos(hit),nmismatches,Substring_querystart(hit),Substring_queryend(hit))); | |
12470 | 11597 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12471 | 11598 | (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit); |
12472 | 11599 | } |
12479 | 11606 | debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n", |
12480 | 11607 | (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start)); |
12481 | 11608 | if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1, |
12482 | splice_pos,/*substring_querystart*/segment->querypos5, | |
11609 | splice_pos,/*substring_querystart*/first_querypos, | |
12483 | 11610 | /*substring_queryend*/last_querypos, |
12484 | 11611 | nmismatches,prob,/*left*/segment_left,query_compress, |
12485 | 11612 | querylength,plusp,genestrand, |
12486 | 11613 | sensedir,segment->chrnum,segment->chroffset, |
12487 | 11614 | segment->chrhigh,segment->chrlength)) != NULL) { |
12488 | 11615 | debug4e(printf("=> %s antidonor: %f at %d (%d mismatches) %d..%d\n", |
12489 | plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches, | |
11616 | plusp == true ? "plus" : "minus",prob,Substring_siteD_pos(hit),nmismatches, | |
12490 | 11617 | Substring_querystart(hit),Substring_queryend(hit))); |
12491 | 11618 | debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer)); |
12492 | 11619 | (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit); |
12500 | 11627 | } |
12501 | 11628 | } |
12502 | 11629 | } |
11630 | ||
11631 | #ifdef HAVE_ALLOCA | |
11632 | if (querylength <= MAX_STACK_READLENGTH) { | |
11633 | FREEA(mismatch_positions); | |
11634 | FREEA(segment_donor_knownpos); | |
11635 | FREEA(segment_acceptor_knownpos); | |
11636 | FREEA(segment_antidonor_knownpos); | |
11637 | FREEA(segment_antiacceptor_knownpos); | |
11638 | FREEA(segment_donor_knowni); | |
11639 | FREEA(segment_acceptor_knowni); | |
11640 | FREEA(segment_antidonor_knowni); | |
11641 | FREEA(segment_antiacceptor_knowni); | |
11642 | FREEA(positions_alloc); | |
11643 | FREEA(knowni_alloc); | |
11644 | } else { | |
11645 | FREE(mismatch_positions); | |
11646 | FREE(segment_donor_knownpos); | |
11647 | FREE(segment_acceptor_knownpos); | |
11648 | FREE(segment_antidonor_knownpos); | |
11649 | FREE(segment_antiacceptor_knownpos); | |
11650 | FREE(segment_donor_knowni); | |
11651 | FREE(segment_acceptor_knowni); | |
11652 | FREE(segment_antidonor_knowni); | |
11653 | FREE(segment_antiacceptor_knowni); | |
11654 | FREE(positions_alloc); | |
11655 | FREE(knowni_alloc); | |
11656 | } | |
11657 | #else | |
11658 | FREE(mismatch_positions); | |
11659 | FREE(segment_donor_knownpos); | |
11660 | FREE(segment_acceptor_knownpos); | |
11661 | FREE(segment_antidonor_knownpos); | |
11662 | FREE(segment_antiacceptor_knownpos); | |
11663 | FREE(segment_donor_knowni); | |
11664 | FREE(segment_acceptor_knowni); | |
11665 | FREE(segment_antidonor_knowni); | |
11666 | FREE(segment_antiacceptor_knowni); | |
11667 | FREE(positions_alloc); | |
11668 | FREE(knowni_alloc); | |
11669 | #endif | |
12503 | 11670 | |
12504 | 11671 | return; |
12505 | 11672 | } |
12524 | 11691 | Univcoord_T segment_left; |
12525 | 11692 | int nmismatches_left, nmismatches_right; |
12526 | 11693 | Endtype_T start_endtype, end_endtype; |
12527 | ||
12528 | #ifdef HAVE_ALLOCA | |
12529 | int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
12530 | #else | |
12531 | int mismatch_positions[MAX_READLENGTH+1]; | |
12532 | #endif | |
12533 | ||
12534 | 11694 | /* int *floors_from_neg3, *floors_to_pos3; */ |
12535 | 11695 | int max_terminal_length; |
12536 | 11696 | int nterminals_left, nterminals_right, nterminals_middle; |
12538 | 11698 | #ifdef DEBUG4T |
12539 | 11699 | int i; |
12540 | 11700 | #endif |
11701 | ||
11702 | int *mismatch_positions; | |
11703 | ||
11704 | #ifdef HAVE_ALLOCA | |
11705 | if (querylength <= MAX_STACK_READLENGTH) { | |
11706 | mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int)); | |
11707 | } else { | |
11708 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11709 | } | |
11710 | #else | |
11711 | mismatch_positions = (int *) MALLOC((querylength+1)*sizeof(int)); | |
11712 | #endif | |
11713 | ||
12541 | 11714 | |
12542 | 11715 | debug(printf("identify_terminals: Checking up to %d mismatches\n",max_mismatches_allowed)); |
12543 | 11716 | |
12980 | 12153 | minus_terminals_right = (List_T) NULL; |
12981 | 12154 | } |
12982 | 12155 | |
12156 | #ifdef HAVE_ALLOCA | |
12157 | if (querylength <= MAX_STACK_READLENGTH) { | |
12158 | FREEA(mismatch_positions); | |
12159 | } else { | |
12160 | FREE(mismatch_positions); | |
12161 | } | |
12162 | #else | |
12163 | FREE(mismatch_positions); | |
12164 | #endif | |
12165 | ||
12983 | 12166 | return List_append(plus_terminals_middle, |
12984 | 12167 | List_append(plus_terminals_left, |
12985 | 12168 | List_append(plus_terminals_right, |
13161 | 12344 | intragenic_splice_p (Chrpos_T splicedistance, Substring_T donor, Substring_T acceptor) { |
13162 | 12345 | int knowni; |
13163 | 12346 | |
13164 | if ((knowni = Substring_splicesites_knowni(donor)) >= 0) { | |
12347 | if ((knowni = Substring_splicesitesD_knowni(donor)) >= 0) { | |
13165 | 12348 | if (splicedists[knowni] >= splicedistance) { |
13166 | 12349 | return true; |
13167 | 12350 | } |
13168 | 12351 | } |
13169 | 12352 | |
13170 | if ((knowni = Substring_splicesites_knowni(acceptor)) >= 0) { | |
12353 | if ((knowni = Substring_splicesitesA_knowni(acceptor)) >= 0) { | |
13171 | 12354 | if (splicedists[knowni] >= splicedistance) { |
13172 | 12355 | return true; |
13173 | 12356 | } |
13238 | 12421 | (unsigned long long) Substring_genomicstart(endfrag), |
13239 | 12422 | Substring_querystart(endfrag),Substring_queryend(endfrag))); |
13240 | 12423 | |
13241 | if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) { | |
12424 | if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) { | |
13242 | 12425 | debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n")); |
13243 | 12426 | p = p->rest; |
13244 | 12427 | } else if (pos > querylength - min_endlength_2) { |
13245 | 12428 | debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n")); |
13246 | 12429 | p = p->rest; |
13247 | } else if (pos < Substring_chimera_pos(endfrag)) { | |
13248 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12430 | } else if (pos < Substring_siteN_pos(endfrag)) { | |
12431 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13249 | 12432 | p = p->rest; |
13250 | } else if (pos > Substring_chimera_pos(endfrag)) { | |
13251 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12433 | } else if (pos > Substring_siteN_pos(endfrag)) { | |
12434 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13252 | 12435 | q = q->rest; |
13253 | 12436 | } else { |
13254 | 12437 | /* Generate all pairs at this splice_pos */ |
13255 | 12438 | qsave = q; |
13256 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12439 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) p->first)) == pos) { | |
13257 | 12440 | startfrag = (Substring_T) p->first; |
13258 | 12441 | debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos)); |
13259 | 12442 | q = qsave; |
13260 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12443 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) q->first)) == pos) { | |
13261 | 12444 | endfrag = (Substring_T) q->first; |
13262 | 12445 | debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos)); |
13263 | 12446 | if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) { |
13333 | 12516 | (unsigned long long) Substring_genomicstart(endfrag), |
13334 | 12517 | Substring_querystart(endfrag),Substring_queryend(endfrag))); |
13335 | 12518 | |
13336 | if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) { | |
12519 | if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) { | |
13337 | 12520 | debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n")); |
13338 | 12521 | p = p->rest; |
13339 | 12522 | } else if (pos > querylength - min_endlength_2) { |
13340 | 12523 | debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n")); |
13341 | 12524 | p = p->rest; |
13342 | } else if (pos < Substring_chimera_pos(endfrag)) { | |
13343 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12525 | } else if (pos < Substring_siteN_pos(endfrag)) { | |
12526 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13344 | 12527 | p = p->rest; |
13345 | } else if (pos > Substring_chimera_pos(endfrag)) { | |
13346 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12528 | } else if (pos > Substring_siteN_pos(endfrag)) { | |
12529 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13347 | 12530 | q = q->rest; |
13348 | 12531 | } else { |
13349 | 12532 | qsave = q; |
13350 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12533 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) p->first)) == pos) { | |
13351 | 12534 | startfrag = (Substring_T) p->first; |
13352 | 12535 | debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos)); |
13353 | 12536 | q = qsave; |
13354 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12537 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteN_pos(((Substring_T) q->first)) == pos) { | |
13355 | 12538 | endfrag = (Substring_T) q->first; |
13356 | 12539 | debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos)); |
13357 | 12540 | if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) { |
13433 | 12616 | (unsigned long long) Substring_genomicstart(endfrag), |
13434 | 12617 | Substring_querystart(endfrag),Substring_queryend(endfrag))); |
13435 | 12618 | |
13436 | if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) { | |
12619 | if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) { | |
13437 | 12620 | debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n")); |
13438 | 12621 | p = p->rest; |
13439 | 12622 | } else if (pos > querylength - min_endlength_2) { |
13440 | 12623 | debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n")); |
13441 | 12624 | p = p->rest; |
13442 | } else if (pos < Substring_chimera_pos(endfrag)) { | |
13443 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12625 | } else if (pos < Substring_siteN_pos(endfrag)) { | |
12626 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13444 | 12627 | p = p->rest; |
13445 | } else if (pos > Substring_chimera_pos(endfrag)) { | |
13446 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12628 | } else if (pos > Substring_siteN_pos(endfrag)) { | |
12629 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13447 | 12630 | q = q->rest; |
13448 | 12631 | } else { |
13449 | 12632 | qsave = q; |
13450 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12633 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) p->first)) == pos) { | |
13451 | 12634 | startfrag = (Substring_T) p->first; |
13452 | 12635 | debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos)); |
13453 | 12636 | q = qsave; |
13454 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12637 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) q->first)) == pos) { | |
13455 | 12638 | endfrag = (Substring_T) q->first; |
13456 | 12639 | debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos)); |
13457 | 12640 | if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) { |
13497 | 12680 | (unsigned long long) Substring_genomicstart(endfrag), |
13498 | 12681 | Substring_querystart(endfrag),Substring_queryend(endfrag))); |
13499 | 12682 | |
13500 | if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) { | |
12683 | if ((pos = Substring_siteN_pos(startfrag)) < min_endlength_1) { | |
13501 | 12684 | debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n")); |
13502 | 12685 | p = p->rest; |
13503 | 12686 | } else if (pos > querylength - min_endlength_2) { |
13504 | 12687 | debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n")); |
13505 | 12688 | p = p->rest; |
13506 | } else if (pos < Substring_chimera_pos(endfrag)) { | |
13507 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12689 | } else if (pos < Substring_siteN_pos(endfrag)) { | |
12690 | debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13508 | 12691 | p = p->rest; |
13509 | } else if (pos > Substring_chimera_pos(endfrag)) { | |
13510 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag))); | |
12692 | } else if (pos > Substring_siteN_pos(endfrag)) { | |
12693 | debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_siteN_pos(endfrag))); | |
13511 | 12694 | q = q->rest; |
13512 | 12695 | } else { |
13513 | 12696 | qsave = q; |
13514 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12697 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) p->first)) == pos) { | |
13515 | 12698 | startfrag = (Substring_T) p->first; |
13516 | 12699 | debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos)); |
13517 | 12700 | q = qsave; |
13518 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12701 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteN_pos(((Substring_T) q->first)) == pos) { | |
13519 | 12702 | endfrag = (Substring_T) q->first; |
13520 | 12703 | debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos)); |
13521 | 12704 | if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) { |
13625 | 12808 | (unsigned long long) Substring_genomicstart(acceptor), |
13626 | 12809 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
13627 | 12810 | |
13628 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) { | |
12811 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) { | |
13629 | 12812 | debug4ld(printf("chimera_pos of donor < min_endlength_1\n")); |
13630 | 12813 | p = p->rest; |
13631 | 12814 | } else if (pos > querylength - min_endlength_2) { |
13632 | 12815 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n")); |
13633 | 12816 | p = p->rest; |
13634 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
13635 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
12817 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
12818 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13636 | 12819 | p = p->rest; |
13637 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
13638 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
12820 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
12821 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13639 | 12822 | q = q->rest; |
13640 | 12823 | } else { |
13641 | 12824 | /* Generate all pairs at this splice_pos */ |
13642 | 12825 | qsave = q; |
13643 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12826 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
13644 | 12827 | donor = (Substring_T) p->first; |
13645 | 12828 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
13646 | 12829 | q = qsave; |
13647 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12830 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
13648 | 12831 | acceptor = (Substring_T) q->first; |
13649 | 12832 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
13650 | 12833 | if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) { |
13675 | 12858 | if (shortdistancep) { |
13676 | 12859 | *localsplicing = List_push(*localsplicing, |
13677 | 12860 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13678 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
12861 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13679 | 12862 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
13680 | 12863 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13681 | 12864 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13686 | 12869 | } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) { |
13687 | 12870 | distantsplicing = List_push(distantsplicing, |
13688 | 12871 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13689 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
12872 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13690 | 12873 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
13691 | 12874 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13692 | 12875 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13720 | 12903 | (unsigned long long) Substring_genomicstart(acceptor), |
13721 | 12904 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
13722 | 12905 | |
13723 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) { | |
12906 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) { | |
13724 | 12907 | debug4ld(printf("chimera_pos of donor < min_endlength_1\n")); |
13725 | 12908 | p = p->rest; |
13726 | 12909 | } else if (pos > querylength - min_endlength_2) { |
13727 | 12910 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n")); |
13728 | 12911 | p = p->rest; |
13729 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
13730 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
12912 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
12913 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13731 | 12914 | p = p->rest; |
13732 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
13733 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
12915 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
12916 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13734 | 12917 | q = q->rest; |
13735 | 12918 | } else { |
13736 | 12919 | qsave = q; |
13737 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
12920 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
13738 | 12921 | donor = (Substring_T) p->first; |
13739 | 12922 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
13740 | 12923 | q = qsave; |
13741 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
12924 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
13742 | 12925 | acceptor = (Substring_T) q->first; |
13743 | 12926 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
13744 | 12927 | if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) { |
13768 | 12951 | if (shortdistancep) { |
13769 | 12952 | *localsplicing = List_push(*localsplicing, |
13770 | 12953 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13771 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
12954 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13772 | 12955 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
13773 | 12956 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13774 | 12957 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13779 | 12962 | } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) { |
13780 | 12963 | distantsplicing = List_push(distantsplicing, |
13781 | 12964 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13782 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
12965 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13783 | 12966 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
13784 | 12967 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13785 | 12968 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13812 | 12995 | (unsigned long long) Substring_genomicstart(acceptor), |
13813 | 12996 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
13814 | 12997 | |
13815 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) { | |
12998 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) { | |
13816 | 12999 | debug4ld(printf("chimera_pos of donor < min_endlength_2\n")); |
13817 | 13000 | p = p->rest; |
13818 | 13001 | } else if (pos > querylength - min_endlength_1) { |
13819 | 13002 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n")); |
13820 | 13003 | p = p->rest; |
13821 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
13822 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13004 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13005 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13823 | 13006 | p = p->rest; |
13824 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
13825 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13007 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13008 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13826 | 13009 | q = q->rest; |
13827 | 13010 | } else { |
13828 | 13011 | qsave = q; |
13829 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13012 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
13830 | 13013 | donor = (Substring_T) p->first; |
13831 | 13014 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
13832 | 13015 | q = qsave; |
13833 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13016 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
13834 | 13017 | acceptor = (Substring_T) q->first; |
13835 | 13018 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
13836 | 13019 | if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) { |
13861 | 13044 | if (shortdistancep) { |
13862 | 13045 | *localsplicing = List_push(*localsplicing, |
13863 | 13046 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13864 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13047 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13865 | 13048 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
13866 | 13049 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13867 | 13050 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13872 | 13055 | } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) { |
13873 | 13056 | distantsplicing = List_push(distantsplicing, |
13874 | 13057 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13875 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13058 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13876 | 13059 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
13877 | 13060 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13878 | 13061 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13905 | 13088 | (unsigned long long) Substring_genomicstart(acceptor), |
13906 | 13089 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
13907 | 13090 | |
13908 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) { | |
13091 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) { | |
13909 | 13092 | debug4ld(printf("chimera_pos of donor < min_endlength_2\n")); |
13910 | 13093 | p = p->rest; |
13911 | 13094 | } else if (pos > querylength - min_endlength_1) { |
13912 | 13095 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n")); |
13913 | 13096 | p = p->rest; |
13914 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
13915 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13097 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13098 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13916 | 13099 | p = p->rest; |
13917 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
13918 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13100 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13101 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
13919 | 13102 | q = q->rest; |
13920 | 13103 | } else { |
13921 | 13104 | qsave = q; |
13922 | 13105 | |
13923 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13106 | while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
13924 | 13107 | donor = (Substring_T) p->first; |
13925 | 13108 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
13926 | 13109 | q = qsave; |
13927 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13110 | while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
13928 | 13111 | acceptor = (Substring_T) q->first; |
13929 | 13112 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
13930 | 13113 | if (Substring_genomicstart(acceptor) == Substring_genomicstart(donor)) { |
13954 | 13137 | if (shortdistancep) { |
13955 | 13138 | *localsplicing = List_push(*localsplicing, |
13956 | 13139 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13957 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13140 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13958 | 13141 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
13959 | 13142 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13960 | 13143 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
13965 | 13148 | } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) { |
13966 | 13149 | distantsplicing = List_push(distantsplicing, |
13967 | 13150 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
13968 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13151 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
13969 | 13152 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
13970 | 13153 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
13971 | 13154 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14003 | 13186 | (unsigned long long) Substring_genomicstart(acceptor), |
14004 | 13187 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
14005 | 13188 | |
14006 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) { | |
13189 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) { | |
14007 | 13190 | debug4ld(printf("chimera_pos of donor < min_endlength_1\n")); |
14008 | 13191 | p = p->rest; |
14009 | 13192 | } else if (pos > querylength - min_endlength_2) { |
14010 | 13193 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n")); |
14011 | 13194 | p = p->rest; |
14012 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
14013 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13195 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13196 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14014 | 13197 | p = p->rest; |
14015 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
14016 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13198 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13199 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14017 | 13200 | q = q->rest; |
14018 | 13201 | } else { |
14019 | 13202 | qsave = q; |
14020 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13203 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
14021 | 13204 | donor = (Substring_T) p->first; |
14022 | 13205 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
14023 | 13206 | q = qsave; |
14024 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13207 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
14025 | 13208 | acceptor = (Substring_T) q->first; |
14026 | 13209 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
14027 | 13210 | if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) { |
14037 | 13220 | (unsigned long long) Substring_genomicstart(acceptor))); |
14038 | 13221 | distantsplicing = List_push(distantsplicing, |
14039 | 13222 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
14040 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13223 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
14041 | 13224 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
14042 | 13225 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14043 | 13226 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14067 | 13250 | (unsigned long long) Substring_genomicstart(acceptor), |
14068 | 13251 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
14069 | 13252 | |
14070 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_1) { | |
13253 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_1) { | |
14071 | 13254 | debug4ld(printf("chimera_pos of donor < min_endlength_1\n")); |
14072 | 13255 | p = p->rest; |
14073 | 13256 | } else if (pos > querylength - min_endlength_2) { |
14074 | 13257 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_2\n")); |
14075 | 13258 | p = p->rest; |
14076 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
14077 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13259 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13260 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14078 | 13261 | p = p->rest; |
14079 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
14080 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13262 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13263 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14081 | 13264 | q = q->rest; |
14082 | 13265 | } else { |
14083 | 13266 | qsave = q; |
14084 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13267 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
14085 | 13268 | donor = (Substring_T) p->first; |
14086 | 13269 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
14087 | 13270 | q = qsave; |
14088 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13271 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
14089 | 13272 | acceptor = (Substring_T) q->first; |
14090 | 13273 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
14091 | 13274 | if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) { |
14101 | 13284 | (unsigned long long) Substring_genomicstart(acceptor))); |
14102 | 13285 | distantsplicing = List_push(distantsplicing, |
14103 | 13286 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
14104 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13287 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
14105 | 13288 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
14106 | 13289 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14107 | 13290 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14132 | 13315 | (unsigned long long) Substring_genomicstart(acceptor), |
14133 | 13316 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
14134 | 13317 | |
14135 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) { | |
13318 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) { | |
14136 | 13319 | debug4ld(printf("chimera_pos of donor < min_endlength_2\n")); |
14137 | 13320 | p = p->rest; |
14138 | 13321 | } else if (pos > querylength - min_endlength_1) { |
14139 | 13322 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n")); |
14140 | 13323 | p = p->rest; |
14141 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
14142 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13324 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13325 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14143 | 13326 | p = p->rest; |
14144 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
14145 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13327 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13328 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14146 | 13329 | q = q->rest; |
14147 | 13330 | } else { |
14148 | 13331 | qsave = q; |
14149 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13332 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
14150 | 13333 | donor = (Substring_T) p->first; |
14151 | 13334 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
14152 | 13335 | q = qsave; |
14153 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13336 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
14154 | 13337 | acceptor = (Substring_T) q->first; |
14155 | 13338 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
14156 | 13339 | if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) { |
14166 | 13349 | (unsigned long long) Substring_genomicstart(acceptor))); |
14167 | 13350 | distantsplicing = List_push(distantsplicing, |
14168 | 13351 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
14169 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13352 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
14170 | 13353 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
14171 | 13354 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14172 | 13355 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14196 | 13379 | (unsigned long long) Substring_genomicstart(acceptor), |
14197 | 13380 | Substring_querystart(acceptor),Substring_queryend(acceptor))); |
14198 | 13381 | |
14199 | if ((pos = Substring_chimera_pos(donor)) < min_endlength_2) { | |
13382 | if ((pos = Substring_siteD_pos(donor)) < min_endlength_2) { | |
14200 | 13383 | debug4ld(printf("chimera_pos of donor < min_endlength_2\n")); |
14201 | 13384 | p = p->rest; |
14202 | 13385 | } else if (pos > querylength - min_endlength_1) { |
14203 | 13386 | debug4ld(printf("chimera_pos of donor > querylength - min_endlength_1\n")); |
14204 | 13387 | p = p->rest; |
14205 | } else if (pos < Substring_chimera_pos(acceptor)) { | |
14206 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13388 | } else if (pos < Substring_siteA_pos(acceptor)) { | |
13389 | debug4ld(printf("chimera_pos of donor %d < chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14207 | 13390 | p = p->rest; |
14208 | } else if (pos > Substring_chimera_pos(acceptor)) { | |
14209 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_chimera_pos(acceptor))); | |
13391 | } else if (pos > Substring_siteA_pos(acceptor)) { | |
13392 | debug4ld(printf("chimera_pos of donor %d > chimera_pos of acceptor %d\n",pos,Substring_siteA_pos(acceptor))); | |
14210 | 13393 | q = q->rest; |
14211 | 13394 | } else { |
14212 | 13395 | qsave = q; |
14213 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) { | |
13396 | while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteD_pos(((Substring_T) p->first)) == pos) { | |
14214 | 13397 | donor = (Substring_T) p->first; |
14215 | 13398 | debug4ld(printf("donor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(donor),pos)); |
14216 | 13399 | q = qsave; |
14217 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) { | |
13400 | while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_siteA_pos(((Substring_T) q->first)) == pos) { | |
14218 | 13401 | acceptor = (Substring_T) q->first; |
14219 | 13402 | debug4ld(printf("acceptor at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(acceptor),pos)); |
14220 | 13403 | if (Substring_chrnum(donor) != Substring_chrnum(acceptor)) { |
14230 | 13413 | (unsigned long long) Substring_genomicstart(acceptor))); |
14231 | 13414 | distantsplicing = List_push(distantsplicing, |
14232 | 13415 | (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2, |
14233 | donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance, | |
13416 | donor,acceptor,Substring_siteD_prob(donor),Substring_siteA_prob(acceptor),distance, | |
14234 | 13417 | /*shortdistancep*/false,distantsplicing_penalty,querylength, |
14235 | 13418 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14236 | 13419 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14319 | 13502 | /* End 1 */ |
14320 | 13503 | for (p = donors_plus[nmismatches]; p != NULL; p = p->rest) { |
14321 | 13504 | donor = (Substring_T) p->first; |
14322 | support = Substring_chimera_pos(donor); | |
13505 | support = Substring_siteD_pos(donor); | |
14323 | 13506 | endlength = querylength - support; |
14324 | 13507 | chrhigh = Substring_chrhigh(donor); |
14325 | 13508 | |
14334 | 13517 | debug4h(printf("End 1: short-overlap donor_plus: #%d:%u (%d mismatches) => searching right\n", |
14335 | 13518 | Substring_chrnum(donor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(donor))); |
14336 | 13519 | |
14337 | if ((i = Substring_splicesites_knowni(donor)) >= 0) { | |
13520 | if ((i = Substring_splicesitesD_knowni(donor)) >= 0) { | |
14338 | 13521 | origleft = Substring_genomicstart(donor); |
14339 | 13522 | if ((splicesites_i = |
14340 | 13523 | Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i, |
14347 | 13530 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14348 | 13531 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14349 | 13532 | debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_length %d\n", |
14350 | Substring_splicesites_knowni(donor),amb_length)); | |
13533 | Substring_splicesitesD_knowni(donor),amb_length)); | |
14351 | 13534 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14352 | donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
13535 | donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
14353 | 13536 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14354 | 13537 | /*ambcoords_donor*/NULL,ambcoords, |
14355 | 13538 | /*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i, |
14367 | 13550 | bestj = Intlist_head(splicesites_i); |
14368 | 13551 | bestleft = splicesites[bestj] - support; |
14369 | 13552 | if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj, |
14370 | Substring_chimera_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength, | |
13553 | Substring_siteD_pos(donor),/*substring_querystart*/0,/*substring_queryend*/querylength, | |
14371 | 13554 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd, |
14372 | 13555 | querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD, |
14373 | 13556 | Substring_chrnum(donor),Substring_chroffset(donor), |
14374 | 13557 | Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) { |
14375 | 13558 | debug4h(printf("End 1: short-overlap donor_plus: Successful splice from donor #%d to acceptor #%d\n", |
14376 | Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor))); | |
13559 | Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor))); | |
14377 | 13560 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14378 | donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft, | |
13561 | donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft, | |
14379 | 13562 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14380 | 13563 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14381 | 13564 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14395 | 13578 | /* End 2 */ |
14396 | 13579 | for (p = acceptors_plus[nmismatches]; p != NULL; p = p->rest) { |
14397 | 13580 | acceptor = (Substring_T) p->first; |
14398 | endlength = Substring_chimera_pos(acceptor); | |
13581 | endlength = Substring_siteA_pos(acceptor); | |
14399 | 13582 | support = querylength - endlength; |
14400 | 13583 | chroffset = Substring_chroffset(acceptor); |
14401 | 13584 | |
14410 | 13593 | debug4h(printf("End 2: short-overlap acceptor_plus: #%d:%u (%d mismatches) => searching left\n", |
14411 | 13594 | Substring_chrnum(acceptor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(acceptor))); |
14412 | 13595 | |
14413 | if ((i = Substring_splicesites_knowni(acceptor)) >= 0) { | |
13596 | if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) { | |
14414 | 13597 | origleft = Substring_genomicstart(acceptor); |
14415 | 13598 | if ((splicesites_i = |
14416 | 13599 | Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i, |
14423 | 13606 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14424 | 13607 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14425 | 13608 | debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_length %d\n", |
14426 | Substring_splicesites_knowni(acceptor),amb_length)); | |
13609 | Substring_splicesitesA_knowni(acceptor),amb_length)); | |
14427 | 13610 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14428 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U, | |
13611 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U, | |
14429 | 13612 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14430 | 13613 | ambcoords,/*ambcoords_acceptor*/NULL, |
14431 | 13614 | /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL, |
14443 | 13626 | bestj = Intlist_head(splicesites_i); |
14444 | 13627 | bestleft = splicesites[bestj] - endlength; |
14445 | 13628 | if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj, |
14446 | Substring_chimera_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength, | |
13629 | Substring_siteA_pos(acceptor),/*substring_querystart*/0,/*substring_queryend*/querylength, | |
14447 | 13630 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd, |
14448 | 13631 | querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_FORWARD, |
14449 | 13632 | Substring_chrnum(acceptor),Substring_chroffset(acceptor), |
14450 | 13633 | Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) { |
14451 | 13634 | debug4h(printf("End 2: short-overlap acceptor_plus: Successful splice from acceptor #%d to donor #%d\n", |
14452 | Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor))); | |
13635 | Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor))); | |
14453 | 13636 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14454 | donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft, | |
13637 | donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft, | |
14455 | 13638 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14456 | 13639 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14457 | 13640 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14471 | 13654 | /* End 3 */ |
14472 | 13655 | for (p = donors_minus[nmismatches]; p != NULL; p = p->rest) { |
14473 | 13656 | donor = (Substring_T) p->first; |
14474 | support = Substring_chimera_pos(donor); | |
13657 | support = Substring_siteD_pos(donor); | |
14475 | 13658 | endlength = querylength - support; |
14476 | 13659 | chroffset = Substring_chroffset(donor); |
14477 | 13660 | |
14486 | 13669 | debug4h(printf("End 3: short-overlap donor_minus: #%d:%u (%d mismatches) => searching left\n", |
14487 | 13670 | Substring_chrnum(donor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(donor))); |
14488 | 13671 | |
14489 | if ((i = Substring_splicesites_knowni(donor)) >= 0) { | |
13672 | if ((i = Substring_splicesitesD_knowni(donor)) >= 0) { | |
14490 | 13673 | origleft = Substring_genomicend(donor); |
14491 | 13674 | if ((splicesites_i = |
14492 | 13675 | Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i, |
14499 | 13682 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14500 | 13683 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14501 | 13684 | debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_length %d\n", |
14502 | Substring_splicesites_knowni(donor),amb_length)); | |
13685 | Substring_splicesitesD_knowni(donor),amb_length)); | |
14503 | 13686 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14504 | donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
13687 | donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
14505 | 13688 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14506 | 13689 | /*ambcoords_donor*/NULL,ambcoords, |
14507 | 13690 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i, |
14519 | 13702 | bestj = Intlist_head(splicesites_i); |
14520 | 13703 | bestleft = splicesites[bestj] - endlength; |
14521 | 13704 | if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj, |
14522 | querylength-Substring_chimera_pos(donor), | |
13705 | querylength-Substring_siteD_pos(donor), | |
14523 | 13706 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14524 | 13707 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev, |
14525 | 13708 | querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD, |
14526 | 13709 | Substring_chrnum(donor),Substring_chroffset(donor), |
14527 | 13710 | Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) { |
14528 | 13711 | debug4h(printf("End 3: short-overlap donor_minus: Successful splice from donor #%d to acceptor #%d\n", |
14529 | Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor))); | |
13712 | Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor))); | |
14530 | 13713 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14531 | donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft, | |
13714 | donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft, | |
14532 | 13715 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14533 | 13716 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14534 | 13717 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14548 | 13731 | /* End 4 */ |
14549 | 13732 | for (p = acceptors_minus[nmismatches]; p != NULL; p = p->rest) { |
14550 | 13733 | acceptor = (Substring_T) p->first; |
14551 | endlength = Substring_chimera_pos(acceptor); | |
13734 | endlength = Substring_siteA_pos(acceptor); | |
14552 | 13735 | support = querylength - endlength; |
14553 | 13736 | chrhigh = Substring_chrhigh(acceptor); |
14554 | 13737 | |
14564 | 13747 | debug4h(printf("End 4: short-overlap acceptor_minus: #%d:%u (%d mismatches) => searching right\n", |
14565 | 13748 | Substring_chrnum(acceptor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(acceptor))); |
14566 | 13749 | |
14567 | if ((i = Substring_splicesites_knowni(acceptor)) >= 0) { | |
13750 | if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) { | |
14568 | 13751 | origleft = Substring_genomicend(acceptor); |
14569 | 13752 | if ((splicesites_i = |
14570 | 13753 | Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i, |
14577 | 13760 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14578 | 13761 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14579 | 13762 | debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_length %d\n", |
14580 | Substring_splicesites_knowni(acceptor),amb_length)); | |
13763 | Substring_splicesitesA_knowni(acceptor),amb_length)); | |
14581 | 13764 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14582 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U, | |
13765 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U, | |
14583 | 13766 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14584 | 13767 | ambcoords,/*ambcoords_acceptor*/NULL, |
14585 | 13768 | /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL, |
14597 | 13780 | bestj = Intlist_head(splicesites_i); |
14598 | 13781 | bestleft = splicesites[bestj] - support; |
14599 | 13782 | if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj, |
14600 | querylength-Substring_chimera_pos(acceptor), | |
13783 | querylength-Substring_siteA_pos(acceptor), | |
14601 | 13784 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14602 | 13785 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev, |
14603 | 13786 | querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_FORWARD, |
14604 | 13787 | Substring_chrnum(acceptor),Substring_chroffset(acceptor), |
14605 | 13788 | Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) { |
14606 | 13789 | debug4h(printf("End 4: short-overlap acceptor_minus: Successful splice from acceptor #%d to #%d\n", |
14607 | Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor))); | |
13790 | Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor))); | |
14608 | 13791 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14609 | donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft, | |
13792 | donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft, | |
14610 | 13793 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14611 | 13794 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14612 | 13795 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14626 | 13809 | /* End 5 */ |
14627 | 13810 | for (p = antidonors_plus[nmismatches]; p != NULL; p = p->rest) { |
14628 | 13811 | donor = (Substring_T) p->first; |
14629 | endlength = Substring_chimera_pos(donor); | |
13812 | endlength = Substring_siteD_pos(donor); | |
14630 | 13813 | support = querylength - endlength; |
14631 | 13814 | chroffset = Substring_chroffset(donor); |
14632 | 13815 | |
14641 | 13824 | debug4h(printf("End 5: short-overlap antidonor_plus: #%d:%u (%d mismatches) => searching left\n", |
14642 | 13825 | Substring_chrnum(donor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(donor))); |
14643 | 13826 | |
14644 | if ((i = Substring_splicesites_knowni(donor)) >= 0) { | |
13827 | if ((i = Substring_splicesitesD_knowni(donor)) >= 0) { | |
14645 | 13828 | origleft = Substring_genomicstart(donor); |
14646 | 13829 | if ((splicesites_i = |
14647 | 13830 | Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i, |
14654 | 13837 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14655 | 13838 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14656 | 13839 | debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_length %d\n", |
14657 | Substring_splicesites_knowni(donor),amb_length)); | |
13840 | Substring_splicesitesD_knowni(donor),amb_length)); | |
14658 | 13841 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14659 | donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
13842 | donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
14660 | 13843 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14661 | 13844 | /*ambcoords_donor*/NULL,ambcoords, |
14662 | 13845 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i, |
14674 | 13857 | bestj = Intlist_head(splicesites_i); |
14675 | 13858 | bestleft = splicesites[bestj] - endlength; |
14676 | 13859 | if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj, |
14677 | Substring_chimera_pos(donor), | |
13860 | Substring_siteD_pos(donor), | |
14678 | 13861 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14679 | 13862 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd, |
14680 | 13863 | querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI, |
14681 | 13864 | Substring_chrnum(donor),Substring_chroffset(donor), |
14682 | 13865 | Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) { |
14683 | 13866 | debug4h(printf("End 5: short-overlap antidonor_plus: Successful splice from antidonor #%d to antiacceptor #%d\n", |
14684 | Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor))); | |
13867 | Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor))); | |
14685 | 13868 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14686 | donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft, | |
13869 | donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft, | |
14687 | 13870 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14688 | 13871 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14689 | 13872 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14703 | 13886 | /* End 6 */ |
14704 | 13887 | for (p = antiacceptors_plus[nmismatches]; p != NULL; p = p->rest) { |
14705 | 13888 | acceptor = (Substring_T) p->first; |
14706 | support = Substring_chimera_pos(acceptor); | |
13889 | support = Substring_siteA_pos(acceptor); | |
14707 | 13890 | endlength = querylength - support; |
14708 | 13891 | chrhigh = Substring_chrhigh(acceptor); |
14709 | 13892 | |
14719 | 13902 | debug4h(printf("End 6: short-overlap antiacceptor_plus: #%d:%u (%d mismatches) => searching right\n", |
14720 | 13903 | Substring_chrnum(acceptor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(acceptor))); |
14721 | 13904 | |
14722 | if ((i = Substring_splicesites_knowni(acceptor)) >= 0) { | |
13905 | if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) { | |
14723 | 13906 | origleft = Substring_genomicstart(acceptor); |
14724 | 13907 | if ((splicesites_i = |
14725 | 13908 | Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i, |
14732 | 13915 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14733 | 13916 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14734 | 13917 | debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_length %d\n", |
14735 | Substring_splicesites_knowni(acceptor),amb_length)); | |
13918 | Substring_splicesitesA_knowni(acceptor),amb_length)); | |
14736 | 13919 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14737 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U, | |
13920 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U, | |
14738 | 13921 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14739 | 13922 | ambcoords,/*ambcoords_acceptor*/NULL, |
14740 | 13923 | /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL, |
14752 | 13935 | bestj = Intlist_head(splicesites_i); |
14753 | 13936 | bestleft = splicesites[bestj] - support; |
14754 | 13937 | if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj, |
14755 | Substring_chimera_pos(acceptor), | |
13938 | Substring_siteA_pos(acceptor), | |
14756 | 13939 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14757 | 13940 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_fwd, |
14758 | 13941 | querylength,/*plusp*/true,genestrand,/*sensedir*/SENSE_ANTI, |
14759 | 13942 | Substring_chrnum(acceptor),Substring_chroffset(acceptor), |
14760 | 13943 | Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) { |
14761 | 13944 | debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful splice from antiacceptor #%d to antidonor #%d\n", |
14762 | Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor))); | |
13945 | Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor))); | |
14763 | 13946 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14764 | donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft, | |
13947 | donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/bestleft-origleft, | |
14765 | 13948 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14766 | 13949 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14767 | 13950 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14781 | 13964 | /* End 7 */ |
14782 | 13965 | for (p = antidonors_minus[nmismatches]; p != NULL; p = p->rest) { |
14783 | 13966 | donor = (Substring_T) p->first; |
14784 | endlength = Substring_chimera_pos(donor); | |
13967 | endlength = Substring_siteD_pos(donor); | |
14785 | 13968 | support = querylength - endlength; |
14786 | 13969 | chrhigh = Substring_chrhigh(donor); |
14787 | 13970 | |
14797 | 13980 | debug4h(printf("End 7: short-overlap antidonor_minus: #%d:%u (%d mismatches) => searching right\n", |
14798 | 13981 | Substring_chrnum(donor),(Chrpos_T) (leftbound-1-chroffset),Substring_nmismatches_whole(donor))); |
14799 | 13982 | |
14800 | if ((i = Substring_splicesites_knowni(donor)) >= 0) { | |
13983 | if ((i = Substring_splicesitesD_knowni(donor)) >= 0) { | |
14801 | 13984 | origleft = Substring_genomicend(donor); |
14802 | 13985 | if ((splicesites_i = |
14803 | 13986 | Splicetrie_find_right(&nmismatches_shortend,&nmismatches_list,i, |
14810 | 13993 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14811 | 13994 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14812 | 13995 | debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_length %d\n", |
14813 | Substring_splicesites_knowni(donor),amb_length)); | |
13996 | Substring_splicesitesD_knowni(donor),amb_length)); | |
14814 | 13997 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14815 | donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
13998 | donor,/*acceptor*/NULL,Substring_siteD_prob(donor),Doublelist_max(probs_list),/*distance*/0U, | |
14816 | 13999 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14817 | 14000 | /*ambcoords_donor*/NULL,ambcoords, |
14818 | 14001 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i, |
14830 | 14013 | bestj = Intlist_head(splicesites_i); |
14831 | 14014 | bestleft = splicesites[bestj] - support; |
14832 | 14015 | if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj, |
14833 | querylength-Substring_chimera_pos(donor), | |
14016 | querylength-Substring_siteD_pos(donor), | |
14834 | 14017 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14835 | 14018 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev, |
14836 | 14019 | querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI, |
14837 | 14020 | Substring_chrnum(donor),Substring_chroffset(donor), |
14838 | 14021 | Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) { |
14839 | 14022 | debug4h(printf("End 7: short-overlap antidonor_minus: Successful splice from antidonor #%d to antiacceptor #%d\n", |
14840 | Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor))); | |
14023 | Substring_splicesitesD_knowni(donor),Substring_splicesitesA_knowni(acceptor))); | |
14841 | 14024 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend, |
14842 | donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft, | |
14025 | donor,acceptor,Substring_siteD_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft, | |
14843 | 14026 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14844 | 14027 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14845 | 14028 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
14859 | 14042 | /* End 8 */ |
14860 | 14043 | for (p = antiacceptors_minus[nmismatches]; p != NULL; p = p->rest) { |
14861 | 14044 | acceptor = (Substring_T) p->first; |
14862 | support = Substring_chimera_pos(acceptor); | |
14045 | support = Substring_siteA_pos(acceptor); | |
14863 | 14046 | endlength = querylength - support; |
14864 | 14047 | chroffset = Substring_chroffset(acceptor); |
14865 | 14048 | |
14874 | 14057 | debug4h(printf("End 8: short-overlap antiacceptor_minus: #%d:%u (%d mismatches) => searching left\n", |
14875 | 14058 | Substring_chrnum(acceptor),(Chrpos_T) (rightbound+1-chroffset),Substring_nmismatches_whole(acceptor))); |
14876 | 14059 | |
14877 | if ((i = Substring_splicesites_knowni(acceptor)) >= 0) { | |
14060 | if ((i = Substring_splicesitesA_knowni(acceptor)) >= 0) { | |
14878 | 14061 | origleft = Substring_genomicend(acceptor); |
14879 | 14062 | if ((splicesites_i = |
14880 | 14063 | Splicetrie_find_left(&nmismatches_shortend,&nmismatches_list,i, |
14887 | 14070 | ambcoords = lookup_splicesites(&probs_list,splicesites_i,splicesites); |
14888 | 14071 | debug4h(amb_length = endlength /*- nmismatches_shortend*/); |
14889 | 14072 | debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_length %d\n", |
14890 | Substring_splicesites_knowni(acceptor),amb_length)); | |
14073 | Substring_splicesitesA_knowni(acceptor),amb_length)); | |
14891 | 14074 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14892 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U, | |
14075 | /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_siteA_prob(acceptor),/*distance*/0U, | |
14893 | 14076 | /*shortdistancep*/false,/*penalty*/0,querylength, |
14894 | 14077 | ambcoords,/*ambcoords_acceptor*/NULL, |
14895 | 14078 | /*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL, |
14907 | 14090 | bestj = Intlist_head(splicesites_i); |
14908 | 14091 | bestleft = splicesites[bestj] - endlength; |
14909 | 14092 | if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj, |
14910 | querylength-Substring_chimera_pos(acceptor), | |
14093 | querylength-Substring_siteA_pos(acceptor), | |
14911 | 14094 | /*substring_querystart*/0,/*substring_queryend*/querylength, |
14912 | 14095 | nmismatches_shortend,/*prob*/2.0,/*left*/bestleft,query_compress_rev, |
14913 | 14096 | querylength,/*plusp*/false,genestrand,/*sensedir*/SENSE_ANTI, |
14914 | 14097 | Substring_chrnum(acceptor),Substring_chroffset(acceptor), |
14915 | 14098 | Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) { |
14916 | 14099 | debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful splice from antiacceptor #%d to antidonor #%d\n", |
14917 | Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor))); | |
14100 | Substring_splicesitesA_knowni(acceptor),Substring_splicesitesD_knowni(donor))); | |
14918 | 14101 | hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches, |
14919 | donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft, | |
14102 | donor,acceptor,/*donor_prob*/2.0,Substring_siteA_prob(acceptor),/*distance*/origleft-bestleft, | |
14920 | 14103 | /*shortdistancep*/true,localsplicing_penalty,querylength, |
14921 | 14104 | /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL, |
14922 | 14105 | /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL, |
15048 | 14231 | } else if (*any_omitted_p) { |
15049 | 14232 | floors = Floors_new_omitted(querylength,max_end_insertions,this->omitted); |
15050 | 14233 | *alloc_floors_p = true; |
15051 | } else if (querylength > MAX_READLENGTH) { | |
14234 | } else if (querylength > max_floors_readlength) { | |
15052 | 14235 | floors = Floors_new_standard(querylength,max_end_insertions,/*keep_floors_p*/false); |
15053 | 14236 | *alloc_floors_p = true; |
15054 | 14237 | } else if (keep_floors_p == false) { |
18310 | 17493 | /* Search 3: Subs/indels via complete set */ |
18311 | 17494 | |
18312 | 17495 | /* 4, 5. Complete set mismatches and indels, omitting frequent oligos */ |
18313 | completesetp = false; | |
18314 | for (q = subs; q != NULL; q = List_next(q)) { | |
18315 | hit = (Stage3end_T) List_head(q); | |
18316 | debug(printf("Hit has total score of %d\n",Stage3end_score(hit))); | |
18317 | if (Stage3end_score(hit) > done_level) { | |
18318 | completesetp = true; | |
17496 | if (subs == NULL) { | |
17497 | completesetp = true; | |
17498 | } else { | |
17499 | completesetp = false; | |
17500 | for (q = subs; q != NULL; q = List_next(q)) { | |
17501 | hit = (Stage3end_T) List_head(q); | |
17502 | debug(printf("Hit has total score of %d\n",Stage3end_score(hit))); | |
17503 | if (Stage3end_score(hit) > done_level) { | |
17504 | completesetp = true; | |
17505 | } | |
18319 | 17506 | } |
18320 | 17507 | } |
18321 | 17508 | debug(printf("completesetp %d\n",completesetp)); |
18462 | 17649 | } |
18463 | 17650 | #endif |
18464 | 17651 | |
18465 | if (knownsplicingp == true && done_level >= localsplicing_penalty) { | |
17652 | if (knownsplicingp == true && done_level >= localsplicing_penalty && | |
17653 | (max_splice_mismatches = done_level - localsplicing_penalty) >= 0) { | |
18466 | 17654 | /* Want >= and not > to give better results. Negligible effect on speed. */ |
18467 | 17655 | /* 8. Shortend splicing */ |
18468 | ||
18469 | max_splice_mismatches = done_level - localsplicing_penalty; | |
18470 | 17656 | debug(printf("*** Stage 8. Short-end splicing, allowing %d mismatches ***\n",max_splice_mismatches)); |
18471 | 17657 | |
18472 | 17658 | donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T)); |
18574 | 17760 | debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n", |
18575 | 17761 | done_level,distantsplicing_penalty,min_trim,min_distantsplicing_end_matches)); |
18576 | 17762 | |
18577 | } else if (find_dna_chimeras_p == true) { | |
17763 | } else if (find_dna_chimeras_p == true && | |
17764 | (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) { | |
18578 | 17765 | /* 9 (DNA). Find distant splicing for DNA */ |
18579 | max_splice_mismatches = done_level - distantsplicing_penalty; | |
18580 | 17766 | debug(printf("*** Stage 9 (DNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches)); |
18581 | 17767 | |
18582 | 17768 | startfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T)); |
18617 | 17803 | debug(printf("*** Stage 9 (DNA). Distant splicing, allowing %d mismatches ***\n",nmismatches)); |
18618 | 17804 | |
18619 | 17805 | debug4e(printf("Sorting splice ends\n")); |
18620 | startfrags_plus[nmismatches] = Substring_sort_chimera_halves(startfrags_plus[nmismatches],/*ascendingp*/true); | |
18621 | endfrags_plus[nmismatches] = Substring_sort_chimera_halves(endfrags_plus[nmismatches],/*ascendingp*/true); | |
18622 | ||
18623 | startfrags_minus[nmismatches] = Substring_sort_chimera_halves(startfrags_minus[nmismatches],/*ascendingp*/false); | |
18624 | endfrags_minus[nmismatches] = Substring_sort_chimera_halves(endfrags_minus[nmismatches],/*ascendingp*/false); | |
17806 | startfrags_plus[nmismatches] = Substring_sort_siteN_halves(startfrags_plus[nmismatches],/*ascendingp*/true); | |
17807 | endfrags_plus[nmismatches] = Substring_sort_siteN_halves(endfrags_plus[nmismatches],/*ascendingp*/true); | |
17808 | ||
17809 | startfrags_minus[nmismatches] = Substring_sort_siteN_halves(startfrags_minus[nmismatches],/*ascendingp*/false); | |
17810 | endfrags_minus[nmismatches] = Substring_sort_siteN_halves(endfrags_minus[nmismatches],/*ascendingp*/false); | |
18625 | 17811 | |
18626 | 17812 | debug4e(printf("Splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n", |
18627 | 17813 | nmismatches, |
18682 | 17868 | FREEA(startfrags_minus); |
18683 | 17869 | FREEA(endfrags_minus); |
18684 | 17870 | |
18685 | } else if (knownsplicingp || novelsplicingp) { | |
17871 | } else if ((knownsplicingp || novelsplicingp) && | |
17872 | (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) { | |
18686 | 17873 | /* 9 (RNA). Find distant splicing for RNA iteratively using both known and novel splice sites */ |
18687 | max_splice_mismatches = done_level - distantsplicing_penalty; | |
18688 | 17874 | debug(printf("*** Stage 9 (RNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches)); |
18689 | 17875 | |
18690 | 17876 | donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T)); |
18733 | 17919 | debug(printf("*** Stage 9 (RNA). Distant splicing, allowing %d mismatches ***\n",nmismatches)); |
18734 | 17920 | |
18735 | 17921 | debug4e(printf("Sorting splice ends\n")); |
18736 | donors_plus[nmismatches] = Substring_sort_chimera_halves(donors_plus[nmismatches],/*ascendingp*/true); | |
18737 | acceptors_plus[nmismatches] = Substring_sort_chimera_halves(acceptors_plus[nmismatches],/*ascendingp*/true); | |
18738 | ||
18739 | antidonors_plus[nmismatches] = Substring_sort_chimera_halves(antidonors_plus[nmismatches],/*ascendingp*/false); | |
18740 | antiacceptors_plus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false); | |
18741 | ||
18742 | donors_minus[nmismatches] = Substring_sort_chimera_halves(donors_minus[nmismatches],/*ascendingp*/false); | |
18743 | acceptors_minus[nmismatches] = Substring_sort_chimera_halves(acceptors_minus[nmismatches],/*ascendingp*/false); | |
18744 | ||
18745 | antidonors_minus[nmismatches] = Substring_sort_chimera_halves(antidonors_minus[nmismatches],/*ascendingp*/true); | |
18746 | antiacceptors_minus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true); | |
17922 | donors_plus[nmismatches] = Substring_sort_siteD_halves(donors_plus[nmismatches],/*ascendingp*/true); | |
17923 | acceptors_plus[nmismatches] = Substring_sort_siteA_halves(acceptors_plus[nmismatches],/*ascendingp*/true); | |
17924 | ||
17925 | antidonors_plus[nmismatches] = Substring_sort_siteD_halves(antidonors_plus[nmismatches],/*ascendingp*/false); | |
17926 | antiacceptors_plus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false); | |
17927 | ||
17928 | donors_minus[nmismatches] = Substring_sort_siteD_halves(donors_minus[nmismatches],/*ascendingp*/false); | |
17929 | acceptors_minus[nmismatches] = Substring_sort_siteA_halves(acceptors_minus[nmismatches],/*ascendingp*/false); | |
17930 | ||
17931 | antidonors_minus[nmismatches] = Substring_sort_siteD_halves(antidonors_minus[nmismatches],/*ascendingp*/true); | |
17932 | antiacceptors_minus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true); | |
18747 | 17933 | |
18748 | 17934 | debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n", |
18749 | 17935 | nmismatches, |
18990 | 18176 | int querylength, query_lastpos, cutoff_level; |
18991 | 18177 | char *queryuc_ptr, *quality_string; |
18992 | 18178 | Compress_T query_compress_fwd = NULL, query_compress_rev = NULL; |
18179 | char *queryrc; | |
18180 | ||
18181 | querylength = Shortread_fulllength(queryseq); | |
18993 | 18182 | |
18994 | 18183 | #ifdef HAVE_ALLOCA |
18995 | char *queryrc; | |
18996 | #else | |
18997 | char queryrc[MAX_READLENGTH+1]; | |
18998 | #endif | |
18999 | ||
19000 | querylength = Shortread_fulllength(queryseq); | |
19001 | ||
19002 | #ifndef HAVE_ALLOCA | |
19003 | if (querylength > MAX_READLENGTH) { | |
19004 | fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n", | |
19005 | Shortread_accession(queryseq),querylength,MAX_READLENGTH); | |
19006 | *npaths_primary = *npaths_altloc = 0; | |
19007 | return (Stage3end_T *) NULL; | |
19008 | } | |
18184 | if (querylength <= MAX_STACK_READLENGTH) { | |
18185 | queryrc = (char *) ALLOCA((querylength+1)*sizeof(int)); | |
18186 | } else { | |
18187 | queryrc = (char *) MALLOC((querylength+1)*sizeof(int)); | |
18188 | } | |
18189 | #else | |
18190 | queryrc = (char *) MALLOC((querylength+1)*sizeof(int)); | |
19009 | 18191 | #endif |
19010 | 18192 | |
19011 | 18193 | if (user_maxlevel_float < 0.0) { |
19033 | 18215 | |
19034 | 18216 | query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength); |
19035 | 18217 | query_compress_rev = Compress_new_rev(queryuc_ptr,querylength); |
19036 | #ifdef HAVE_ALLOCA | |
19037 | queryrc = (char *) ALLOCA((querylength+1)*sizeof(int)); | |
19038 | #endif | |
19039 | 18218 | make_complement_buffered(queryrc,queryuc_ptr,querylength); |
19040 | 18219 | |
19041 | 18220 | this = Stage1_new(querylength); |
19069 | 18248 | Compress_free(&query_compress_fwd); |
19070 | 18249 | Compress_free(&query_compress_rev); |
19071 | 18250 | Stage1_free(&this,querylength); |
18251 | ||
18252 | #ifdef HAVE_ALLOCA | |
18253 | if (querylength <= MAX_STACK_READLENGTH) { | |
18254 | FREEA(queryrc); | |
18255 | } else { | |
18256 | FREE(queryrc); | |
18257 | } | |
18258 | #else | |
18259 | FREE(queryrc); | |
18260 | #endif | |
18261 | ||
19072 | 18262 | return stage3array; |
19073 | 18263 | } |
19074 | 18264 | |
19094 | 18284 | char *queryuc_ptr, *quality_string; |
19095 | 18285 | Compress_T query_compress_fwd = NULL, query_compress_rev = NULL; |
19096 | 18286 | bool allvalidp; |
18287 | char *queryrc; | |
18288 | ||
18289 | querylength = Shortread_fulllength(queryseq); | |
19097 | 18290 | |
19098 | 18291 | #ifdef HAVE_ALLOCA |
19099 | char *queryrc; | |
19100 | #else | |
19101 | char queryrc[MAX_READLENGTH+1]; | |
19102 | #endif | |
19103 | ||
19104 | querylength = Shortread_fulllength(queryseq); | |
19105 | ||
19106 | #ifndef HAVE_ALLOCA | |
19107 | if (querylength > MAX_READLENGTH) { | |
19108 | fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n", | |
19109 | Shortread_accession(queryseq),querylength,MAX_READLENGTH); | |
19110 | *npaths_primary = *npaths_altloc = 0; | |
19111 | return (Stage3end_T *) NULL; | |
19112 | } | |
18292 | if (querylength <= MAX_STACK_READLENGTH) { | |
18293 | queryrc = (char *) ALLOCA((querylength+1)*sizeof(int)); | |
18294 | } else { | |
18295 | queryrc = (char *) MALLOC((querylength+1)*sizeof(int)); | |
18296 | } | |
18297 | #else | |
18298 | queryrc = (char *) MALLOC((querylength+1)*sizeof(int)); | |
19113 | 18299 | #endif |
19114 | 18300 | |
19115 | 18301 | if (user_maxlevel_float < 0.0) { |
19143 | 18329 | query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength); |
19144 | 18330 | query_compress_rev = Compress_new_rev(queryuc_ptr,querylength); |
19145 | 18331 | gmap_history = History_new(); |
19146 | #ifdef HAVE_ALLOCA | |
19147 | queryrc = (char *) ALLOCA((querylength+1)*sizeof(char)); | |
19148 | #endif | |
19149 | 18332 | make_complement_buffered(queryrc,queryuc_ptr,querylength); |
19150 | 18333 | |
19151 | 18334 | if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1) > 0) { |
19198 | 18381 | Compress_free(&query_compress_rev); |
19199 | 18382 | Stage1_free(&this_geneminus,querylength); |
19200 | 18383 | Stage1_free(&this_geneplus,querylength); |
18384 | ||
18385 | #ifdef HAVE_ALLOCA | |
18386 | if (querylength <= MAX_STACK_READLENGTH) { | |
18387 | FREEA(queryrc); | |
18388 | } else { | |
18389 | FREE(queryrc); | |
18390 | } | |
18391 | #else | |
18392 | FREE(queryrc); | |
18393 | #endif | |
18394 | ||
19201 | 18395 | return stage3array; |
19202 | 18396 | } |
19203 | 18397 | |
21003 | 20197 | } |
21004 | 20198 | debug(printf("Test for completeset using better_free_end_exists_p: completeset5p %d, completeset3p %d\n",completeset5p,completeset3p)); |
21005 | 20199 | #endif |
20200 | ||
20201 | #if 0 | |
20202 | } else { | |
20203 | /* This causes very slow running time */ | |
20204 | if (subs5 == NULL) { | |
20205 | completeset5p = true; | |
20206 | } | |
20207 | if (subs3 == NULL) { | |
20208 | completeset3p = true; | |
20209 | } | |
20210 | #endif | |
21006 | 20211 | } |
21007 | 20212 | |
21008 | 20213 | if (querylength5 < min_kmer_readlength) { |
21540 | 20745 | nmismatches,max_splice_mismatches_5)); |
21541 | 20746 | |
21542 | 20747 | debug4e(printf("Sorting splice ends\n")); |
21543 | donors_plus_5[nmismatches] = Substring_sort_chimera_halves(donors_plus_5[nmismatches],/*ascendingp*/true); | |
21544 | acceptors_plus_5[nmismatches] = Substring_sort_chimera_halves(acceptors_plus_5[nmismatches],/*ascendingp*/true); | |
21545 | ||
21546 | antidonors_plus_5[nmismatches] = Substring_sort_chimera_halves(antidonors_plus_5[nmismatches],/*ascendingp*/false); | |
21547 | antiacceptors_plus_5[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus_5[nmismatches],/*ascendingp*/false); | |
21548 | ||
21549 | donors_minus_5[nmismatches] = Substring_sort_chimera_halves(donors_minus_5[nmismatches],/*ascendingp*/false); | |
21550 | acceptors_minus_5[nmismatches] = Substring_sort_chimera_halves(acceptors_minus_5[nmismatches],/*ascendingp*/false); | |
21551 | ||
21552 | antidonors_minus_5[nmismatches] = Substring_sort_chimera_halves(antidonors_minus_5[nmismatches],/*ascendingp*/true); | |
21553 | antiacceptors_minus_5[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus_5[nmismatches],/*ascendingp*/true); | |
20748 | donors_plus_5[nmismatches] = Substring_sort_siteD_halves(donors_plus_5[nmismatches],/*ascendingp*/true); | |
20749 | acceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_5[nmismatches],/*ascendingp*/true); | |
20750 | ||
20751 | antidonors_plus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_5[nmismatches],/*ascendingp*/false); | |
20752 | antiacceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_5[nmismatches],/*ascendingp*/false); | |
20753 | ||
20754 | donors_minus_5[nmismatches] = Substring_sort_siteD_halves(donors_minus_5[nmismatches],/*ascendingp*/false); | |
20755 | acceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_5[nmismatches],/*ascendingp*/false); | |
20756 | ||
20757 | antidonors_minus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_5[nmismatches],/*ascendingp*/true); | |
20758 | antiacceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_5[nmismatches],/*ascendingp*/true); | |
21554 | 20759 | |
21555 | 20760 | debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n", |
21556 | 20761 | nmismatches, |
21641 | 20846 | nmismatches,max_splice_mismatches_3)); |
21642 | 20847 | |
21643 | 20848 | debug4e(printf("Sorting splice ends\n")); |
21644 | donors_plus_3[nmismatches] = Substring_sort_chimera_halves(donors_plus_3[nmismatches],/*ascendingp*/true); | |
21645 | acceptors_plus_3[nmismatches] = Substring_sort_chimera_halves(acceptors_plus_3[nmismatches],/*ascendingp*/true); | |
21646 | ||
21647 | antidonors_plus_3[nmismatches] = Substring_sort_chimera_halves(antidonors_plus_3[nmismatches],/*ascendingp*/false); | |
21648 | antiacceptors_plus_3[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus_3[nmismatches],/*ascendingp*/false); | |
21649 | ||
21650 | donors_minus_3[nmismatches] = Substring_sort_chimera_halves(donors_minus_3[nmismatches],/*ascendingp*/false); | |
21651 | acceptors_minus_3[nmismatches] = Substring_sort_chimera_halves(acceptors_minus_3[nmismatches],/*ascendingp*/false); | |
21652 | ||
21653 | antidonors_minus_3[nmismatches] = Substring_sort_chimera_halves(antidonors_minus_3[nmismatches],/*ascendingp*/true); | |
21654 | antiacceptors_minus_3[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus_3[nmismatches],/*ascendingp*/true); | |
20849 | donors_plus_3[nmismatches] = Substring_sort_siteD_halves(donors_plus_3[nmismatches],/*ascendingp*/true); | |
20850 | acceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_3[nmismatches],/*ascendingp*/true); | |
20851 | ||
20852 | antidonors_plus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_3[nmismatches],/*ascendingp*/false); | |
20853 | antiacceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_3[nmismatches],/*ascendingp*/false); | |
20854 | ||
20855 | donors_minus_3[nmismatches] = Substring_sort_siteD_halves(donors_minus_3[nmismatches],/*ascendingp*/false); | |
20856 | acceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_3[nmismatches],/*ascendingp*/false); | |
20857 | ||
20858 | antidonors_minus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_3[nmismatches],/*ascendingp*/true); | |
20859 | antiacceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_3[nmismatches],/*ascendingp*/true); | |
21655 | 20860 | |
21656 | 20861 | debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n", |
21657 | 20862 | nmismatches, |
23152 | 22357 | int maxpairedpaths = maxpaths_search; /* 100000 */ |
23153 | 22358 | #endif |
23154 | 22359 | bool abort_pairing_p; |
23155 | ||
23156 | #ifdef HAVE_ALLOCA | |
23157 | 22360 | char *queryrc5, *queryrc3; |
23158 | #else | |
23159 | char queryrc5[MAX_READLENGTH+1], queryrc3[MAX_READLENGTH+1]; | |
23160 | #endif | |
23161 | ||
23162 | 22361 | |
23163 | 22362 | querylength5 = Shortread_fulllength(queryseq5); |
23164 | 22363 | querylength3 = Shortread_fulllength(queryseq3); |
23165 | 22364 | |
23166 | #ifndef HAVE_ALLOCA | |
23167 | if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) { | |
23168 | fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n", | |
23169 | Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH); | |
23170 | *npaths_primary = *npaths_altloc = 0; | |
23171 | *nhits5_primary = *nhits5_altloc = 0; | |
23172 | *nhits3_primary = *nhits3_altloc = 0; | |
23173 | *stage3array5 = *stage3array3 = (Stage3end_T *) NULL; | |
23174 | return (Stage3pair_T *) NULL; | |
23175 | } | |
23176 | #else | |
23177 | queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char)); | |
23178 | queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char)); | |
22365 | #ifdef HAVE_ALLOCA | |
22366 | if (querylength5 <= MAX_STACK_READLENGTH) { | |
22367 | queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char)); | |
22368 | } else { | |
22369 | queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char)); | |
22370 | } | |
22371 | if (querylength3 <= MAX_STACK_READLENGTH) { | |
22372 | queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char)); | |
22373 | } else { | |
22374 | queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char)); | |
22375 | } | |
22376 | #else | |
22377 | queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char)); | |
22378 | queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char)); | |
23179 | 22379 | #endif |
23180 | 22380 | |
23181 | 22381 | if (user_maxlevel_float < 0.0) { |
23273 | 22473 | Compress_free(&query3_compress_rev); |
23274 | 22474 | Stage1_free(&this5,querylength5); |
23275 | 22475 | Stage1_free(&this3,querylength3); |
23276 | return (Stage3pair_T *) NULL; | |
22476 | ||
22477 | stage3pairarray = (Stage3pair_T *) NULL; | |
23277 | 22478 | |
23278 | 22479 | } else { |
23279 | 22480 | stage3pairarray = |
23295 | 22496 | Compress_free(&query3_compress_rev); |
23296 | 22497 | Stage1_free(&this5,querylength5); |
23297 | 22498 | Stage1_free(&this3,querylength3); |
23298 | return stage3pairarray; | |
23299 | } | |
22499 | } | |
22500 | ||
22501 | #ifdef HAVE_ALLOCA | |
22502 | if (querylength5 <= MAX_STACK_READLENGTH) { | |
22503 | FREEA(queryrc5); | |
22504 | } else { | |
22505 | FREE(queryrc5); | |
22506 | } | |
22507 | if (querylength3 <= MAX_STACK_READLENGTH) { | |
22508 | FREEA(queryrc3); | |
22509 | } else { | |
22510 | FREE(queryrc3); | |
22511 | } | |
22512 | #else | |
22513 | FREE(queryrc5); | |
22514 | FREE(queryrc3); | |
22515 | #endif | |
22516 | ||
22517 | return stage3pairarray; | |
23300 | 22518 | } |
23301 | 22519 | |
23302 | 22520 | |
23334 | 22552 | int maxpairedpaths = maxpaths_search; /* 100000 */ |
23335 | 22553 | #endif |
23336 | 22554 | bool abort_pairing_p_geneplus, abort_pairing_p_geneminus; |
23337 | ||
23338 | #ifdef HAVE_ALLOCA | |
23339 | 22555 | char *queryrc5, *queryrc3; |
23340 | #else | |
23341 | char queryrc5[MAX_READLENGTH+1], queryrc3[MAX_READLENGTH+1]; | |
23342 | #endif | |
23343 | 22556 | |
23344 | 22557 | |
23345 | 22558 | querylength5 = Shortread_fulllength(queryseq5); |
23346 | 22559 | querylength3 = Shortread_fulllength(queryseq3); |
23347 | 22560 | |
23348 | #ifndef HAVE_ALLOCA | |
23349 | if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) { | |
23350 | fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n", | |
23351 | Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH); | |
23352 | *npaths_primary = *npaths_altloc = 0; | |
23353 | *nhits5_primary = *nhits5_altloc = 0; | |
23354 | *nhits3_primary = *nhits3_altloc = 0; | |
23355 | *stage3array5 = *stage3array3 = (Stage3end_T *) NULL; | |
23356 | return (Stage3pair_T *) NULL; | |
23357 | } | |
23358 | #else | |
23359 | queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char)); | |
23360 | queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char)); | |
22561 | #ifdef HAVE_ALLOCA | |
22562 | if (querylength5 <= MAX_STACK_READLENGTH) { | |
22563 | queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char)); | |
22564 | } else { | |
22565 | queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char)); | |
22566 | } | |
22567 | if (querylength3 <= MAX_STACK_READLENGTH) { | |
22568 | queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char)); | |
22569 | } else { | |
22570 | queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char)); | |
22571 | } | |
22572 | #else | |
22573 | queryrc5 = (char *) MALLOC((querylength5+1)*sizeof(char)); | |
22574 | queryrc3 = (char *) MALLOC((querylength3+1)*sizeof(char)); | |
23361 | 22575 | #endif |
23362 | 22576 | |
23363 | 22577 | if (user_maxlevel_float < 0.0) { |
23457 | 22671 | terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3); |
23458 | 22672 | |
23459 | 22673 | if (abort_pairing_p_geneplus == true) { |
23460 | debug16(printf("abort_pairing_p_geneplus is true\n")); | |
23461 | paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus, | |
23462 | terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3); | |
23463 | ||
23464 | this_geneplus_5 = Stage1_new(querylength5); | |
23465 | this_geneplus_3 = Stage1_new(querylength3); | |
23466 | realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
23467 | stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
23468 | this_geneplus_5,this_geneplus_3, | |
23469 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
23470 | queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos, | |
23471 | queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos, | |
23472 | indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, | |
23473 | user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3, | |
23474 | indel_penalty_middle,indel_penalty_end, | |
23475 | allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, | |
23476 | localsplicing_penalty,distantsplicing_penalty,min_shortend, | |
23477 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, | |
23478 | keep_floors_p,/*genestrand*/+1); | |
23479 | ||
23480 | *npaths_primary = *npaths_altloc = 0; | |
23481 | *final_pairtype = UNPAIRED; | |
23482 | History_free(&gmap_history_3); | |
23483 | History_free(&gmap_history_5); | |
23484 | Compress_free(&query5_compress_fwd); | |
23485 | Compress_free(&query5_compress_rev); | |
23486 | Compress_free(&query3_compress_fwd); | |
23487 | Compress_free(&query3_compress_rev); | |
23488 | Stage1_free(&this_geneplus_5,querylength5); | |
23489 | Stage1_free(&this_geneplus_3,querylength3); | |
23490 | return (Stage3pair_T *) NULL; | |
23491 | ||
23492 | } else { | |
23493 | stage3pairarray = | |
23494 | consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype), | |
23495 | &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
23496 | &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
23497 | hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,terminals_geneplus, | |
23498 | hits_geneplus_5,hits_geneplus_3, | |
23499 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
23500 | queryseq5,queryuc_ptr_5,quality_string_5,querylength5, | |
23501 | queryseq3,queryuc_ptr_3,quality_string_3,querylength3, | |
23502 | cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3, | |
23503 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR); | |
23504 | History_free(&gmap_history_3); | |
23505 | History_free(&gmap_history_5); | |
23506 | Compress_free(&query5_compress_fwd); | |
23507 | Compress_free(&query5_compress_rev); | |
23508 | Compress_free(&query3_compress_fwd); | |
23509 | Compress_free(&query3_compress_rev); | |
23510 | Stage1_free(&this_geneplus_5,querylength5); | |
23511 | Stage1_free(&this_geneplus_3,querylength3); | |
23512 | return stage3pairarray; | |
23513 | } | |
22674 | debug16(printf("abort_pairing_p_geneplus is true\n")); | |
22675 | paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus, | |
22676 | terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3); | |
22677 | ||
22678 | this_geneplus_5 = Stage1_new(querylength5); | |
22679 | this_geneplus_3 = Stage1_new(querylength3); | |
22680 | realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
22681 | stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
22682 | this_geneplus_5,this_geneplus_3, | |
22683 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
22684 | queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos, | |
22685 | queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos, | |
22686 | indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, | |
22687 | user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3, | |
22688 | indel_penalty_middle,indel_penalty_end, | |
22689 | allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, | |
22690 | localsplicing_penalty,distantsplicing_penalty,min_shortend, | |
22691 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, | |
22692 | keep_floors_p,/*genestrand*/+1); | |
22693 | ||
22694 | *npaths_primary = *npaths_altloc = 0; | |
22695 | *final_pairtype = UNPAIRED; | |
22696 | History_free(&gmap_history_3); | |
22697 | History_free(&gmap_history_5); | |
22698 | Compress_free(&query5_compress_fwd); | |
22699 | Compress_free(&query5_compress_rev); | |
22700 | Compress_free(&query3_compress_fwd); | |
22701 | Compress_free(&query3_compress_rev); | |
22702 | Stage1_free(&this_geneplus_5,querylength5); | |
22703 | Stage1_free(&this_geneplus_3,querylength3); | |
22704 | ||
22705 | stage3pairarray = (Stage3pair_T *) NULL; | |
22706 | ||
22707 | } else { | |
22708 | stage3pairarray = | |
22709 | consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype), | |
22710 | &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
22711 | &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
22712 | hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,terminals_geneplus, | |
22713 | hits_geneplus_5,hits_geneplus_3, | |
22714 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
22715 | queryseq5,queryuc_ptr_5,quality_string_5,querylength5, | |
22716 | queryseq3,queryuc_ptr_3,quality_string_3,querylength3, | |
22717 | cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3, | |
22718 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR); | |
22719 | History_free(&gmap_history_3); | |
22720 | History_free(&gmap_history_5); | |
22721 | Compress_free(&query5_compress_fwd); | |
22722 | Compress_free(&query5_compress_rev); | |
22723 | Compress_free(&query3_compress_fwd); | |
22724 | Compress_free(&query3_compress_rev); | |
22725 | Stage1_free(&this_geneplus_5,querylength5); | |
22726 | Stage1_free(&this_geneplus_3,querylength3); | |
22727 | /* return stage3pairarray; */ | |
22728 | } | |
23514 | 22729 | |
23515 | 22730 | } else if (found_score_geneminus < found_score_geneplus) { |
23516 | 22731 | paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus, |
23517 | 22732 | terminals_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3); |
23518 | 22733 | |
23519 | 22734 | if (abort_pairing_p_geneminus == true) { |
23520 | debug16(printf("abort_pairing_p_geneminus is true\n")); | |
23521 | paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus, | |
23522 | terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3); | |
23523 | ||
23524 | this_geneminus_5 = Stage1_new(querylength5); | |
23525 | this_geneminus_3 = Stage1_new(querylength3); | |
23526 | realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
23527 | stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
23528 | this_geneminus_5,this_geneminus_3, | |
23529 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
23530 | queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos, | |
23531 | queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos, | |
23532 | indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, | |
23533 | user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3, | |
23534 | indel_penalty_middle,indel_penalty_end, | |
23535 | allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, | |
23536 | localsplicing_penalty,distantsplicing_penalty,min_shortend, | |
23537 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, | |
23538 | keep_floors_p,/*genestrand*/+2); | |
23539 | ||
23540 | *npaths_primary = *npaths_altloc = 0; | |
23541 | *final_pairtype = UNPAIRED; | |
23542 | History_free(&gmap_history_3); | |
23543 | History_free(&gmap_history_5); | |
23544 | Compress_free(&query5_compress_fwd); | |
23545 | Compress_free(&query5_compress_rev); | |
23546 | Compress_free(&query3_compress_fwd); | |
23547 | Compress_free(&query3_compress_rev); | |
23548 | Stage1_free(&this_geneminus_5,querylength5); | |
23549 | Stage1_free(&this_geneminus_3,querylength3); | |
23550 | return (Stage3pair_T *) NULL; | |
23551 | ||
23552 | } else { | |
23553 | stage3pairarray = | |
23554 | consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype), | |
23555 | &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
23556 | &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
23557 | hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,terminals_geneminus, | |
23558 | hits_geneminus_5,hits_geneminus_3, | |
23559 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
23560 | queryseq5,queryuc_ptr_5,quality_string_5,querylength5, | |
23561 | queryseq3,queryuc_ptr_3,quality_string_3,querylength3, | |
23562 | cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3, | |
23563 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR); | |
23564 | History_free(&gmap_history_3); | |
23565 | History_free(&gmap_history_5); | |
23566 | Compress_free(&query5_compress_fwd); | |
23567 | Compress_free(&query5_compress_rev); | |
23568 | Compress_free(&query3_compress_fwd); | |
23569 | Compress_free(&query3_compress_rev); | |
23570 | Stage1_free(&this_geneminus_5,querylength5); | |
23571 | Stage1_free(&this_geneminus_3,querylength3); | |
23572 | return stage3pairarray; | |
23573 | } | |
22735 | debug16(printf("abort_pairing_p_geneminus is true\n")); | |
22736 | paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus, | |
22737 | terminals_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3); | |
22738 | ||
22739 | this_geneminus_5 = Stage1_new(querylength5); | |
22740 | this_geneminus_3 = Stage1_new(querylength3); | |
22741 | realign_separately(stage3array5,&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
22742 | stage3array3,&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
22743 | this_geneminus_5,this_geneminus_3, | |
22744 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
22745 | queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos, | |
22746 | queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos, | |
22747 | indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, | |
22748 | user_maxlevel_5,user_maxlevel_3,min_coverage_5,min_coverage_3, | |
22749 | indel_penalty_middle,indel_penalty_end, | |
22750 | allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, | |
22751 | localsplicing_penalty,distantsplicing_penalty,min_shortend, | |
22752 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, | |
22753 | keep_floors_p,/*genestrand*/+2); | |
22754 | ||
22755 | *npaths_primary = *npaths_altloc = 0; | |
22756 | *final_pairtype = UNPAIRED; | |
22757 | History_free(&gmap_history_3); | |
22758 | History_free(&gmap_history_5); | |
22759 | Compress_free(&query5_compress_fwd); | |
22760 | Compress_free(&query5_compress_rev); | |
22761 | Compress_free(&query3_compress_fwd); | |
22762 | Compress_free(&query3_compress_rev); | |
22763 | Stage1_free(&this_geneminus_5,querylength5); | |
22764 | Stage1_free(&this_geneminus_3,querylength3); | |
22765 | ||
22766 | stage3pairarray = (Stage3pair_T *) NULL; | |
22767 | ||
22768 | } else { | |
22769 | stage3pairarray = | |
22770 | consolidate_paired_results(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),&(*final_pairtype), | |
22771 | &(*stage3array5),&(*nhits5_primary),&(*nhits5_altloc),&(*first_absmq5),&(*second_absmq5), | |
22772 | &(*stage3array3),&(*nhits3_primary),&(*nhits3_altloc),&(*first_absmq3),&(*second_absmq3), | |
22773 | hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,terminals_geneminus, | |
22774 | hits_geneminus_5,hits_geneminus_3, | |
22775 | query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev, | |
22776 | queryseq5,queryuc_ptr_5,quality_string_5,querylength5, | |
22777 | queryseq3,queryuc_ptr_3,quality_string_3,querylength3, | |
22778 | cutoff_level_5,cutoff_level_3,min_coverage_5,min_coverage_3, | |
22779 | oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR); | |
22780 | History_free(&gmap_history_3); | |
22781 | History_free(&gmap_history_5); | |
22782 | Compress_free(&query5_compress_fwd); | |
22783 | Compress_free(&query5_compress_rev); | |
22784 | Compress_free(&query3_compress_fwd); | |
22785 | Compress_free(&query3_compress_rev); | |
22786 | Stage1_free(&this_geneminus_5,querylength5); | |
22787 | Stage1_free(&this_geneminus_3,querylength3); | |
22788 | /* return stage3pairarray; */ | |
22789 | } | |
23574 | 22790 | |
23575 | 22791 | } else { |
23576 | 22792 | hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus); |
23600 | 22816 | Stage1_free(&this_geneminus_3,querylength3); |
23601 | 22817 | Stage1_free(&this_geneplus_5,querylength5); |
23602 | 22818 | Stage1_free(&this_geneplus_3,querylength3); |
23603 | return stage3pairarray; | |
23604 | } | |
22819 | /* return stage3pairarray */ | |
22820 | } | |
22821 | ||
22822 | #ifdef HAVE_ALLOCA | |
22823 | if (querylength5 <= MAX_STACK_READLENGTH) { | |
22824 | FREEA(queryrc5); | |
22825 | } else { | |
22826 | FREE(queryrc5); | |
22827 | } | |
22828 | if (querylength3 <= MAX_STACK_READLENGTH) { | |
22829 | FREEA(queryrc3); | |
22830 | } else { | |
22831 | FREE(queryrc3); | |
22832 | } | |
22833 | #else | |
22834 | FREE(queryrc5); | |
22835 | FREE(queryrc3); | |
22836 | #endif | |
22837 | ||
22838 | return stage3pairarray; | |
23605 | 22839 | } |
23606 | 22840 | |
23607 | 22841 | |
23677 | 22911 | int extramaterial_end_in, int extramaterial_paired_in, |
23678 | 22912 | int gmap_mode, int trigger_score_for_gmap_in, int gmap_allowance_in, |
23679 | 22913 | int max_gmap_pairsearch_in, int max_gmap_segments_in, |
23680 | int max_gmap_improvement_in, int antistranded_penalty_in) { | |
22914 | int max_gmap_improvement_in, int antistranded_penalty_in, | |
22915 | int max_floors_readlength_in) { | |
23681 | 22916 | bool gmapp = false; |
23682 | 22917 | |
23683 | 22918 | use_sarray_p = use_sarray_p_in; |
23818 | 23053 | snpp = false; |
23819 | 23054 | } |
23820 | 23055 | |
23056 | max_floors_readlength = max_floors_readlength_in; | |
23057 | ||
23821 | 23058 | return; |
23822 | 23059 | } |
0 | /* $Id: stage1hr.h 186091 2016-03-17 22:23:16Z twu $ */ | |
0 | /* $Id: stage1hr.h 196434 2016-08-16 20:21:03Z twu $ */ | |
1 | 1 | #ifndef STAGE1HR_INCLUDED |
2 | 2 | #define STAGE1HR_INCLUDED |
3 | 3 | |
106 | 106 | int extramaterial_end_in, int extramaterial_paired_in, |
107 | 107 | int gmap_mode, int trigger_score_for_gmap_in, int gmap_allowance_in, |
108 | 108 | int max_gmap_pairsearch_in, int max_gmap_terminal_in, |
109 | int max_gmap_improvement_in, int antistranded_penalty_in); | |
109 | int max_gmap_improvement_in, int antistranded_penalty_in, | |
110 | int max_floors_readlength_in); | |
110 | 111 | |
111 | 112 | |
112 | 113 | #undef T |
0 | static char rcsid[] = "$Id: stage3.c 195963 2016-08-08 16:38:05Z twu $"; | |
0 | static char rcsid[] = "$Id: stage3.c 196409 2016-08-16 15:42:27Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
3622 | 3622 | debug3(Pair_dump_list(exon,true)); |
3623 | 3623 | |
3624 | 3624 | |
3625 | if (exon == NULL) { | |
3626 | *trim5p = false; | |
3627 | return pairs; | |
3628 | } | |
3629 | ||
3625 | 3630 | max_nmatches = max_nmismatches = 0; |
3626 | 3631 | nmatches = nmismatches = 0; |
3627 | 3632 | max_score = score = 0; |
3909 | 3914 | debug3(printf("End exon:\n")); |
3910 | 3915 | debug3(Pair_dump_list(exon,true)); |
3911 | 3916 | |
3917 | ||
3918 | if (exon == NULL) { | |
3919 | *trim3p = false; | |
3920 | return path; | |
3921 | } | |
3912 | 3922 | |
3913 | 3923 | max_nmatches = max_nmismatches = 0; |
3914 | 3924 | nmatches = nmismatches = 0; |
12403 | 12413 | int sense_try, int sense_filter, |
12404 | 12414 | Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) { |
12405 | 12415 | struct Pair_T *pairarray1; |
12406 | List_T pairs_fwd_copy, pairs_rev_copy, p; | |
12416 | List_T p; | |
12407 | 12417 | Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL; |
12408 | 12418 | List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path; |
12409 | 12419 | List_T copy; |
12419 | 12429 | int fwd_ambig_end_length_5 = 0, fwd_ambig_end_length_3 = 0, rev_ambig_end_length_5 = 0, rev_ambig_end_length_3 = 0, temp_ambig_end_length; |
12420 | 12430 | Splicetype_T fwd_ambig_splicetype_5, fwd_ambig_splicetype_3, rev_ambig_splicetype_5, rev_ambig_splicetype_3, temp_ambig_splicetype; |
12421 | 12431 | double fwd_ambig_prob_5, fwd_ambig_prob_3, rev_ambig_prob_5, rev_ambig_prob_3, temp_ambig_prob; |
12432 | #ifdef GSNAP | |
12433 | List_T pairs_fwd_copy, pairs_rev_copy; | |
12434 | #endif | |
12435 | ||
12436 | ||
12422 | 12437 | |
12423 | 12438 | #ifdef COMPLEX_DIRECTION |
12424 | 12439 | int indel_alignment_score_fwd, indel_alignment_score_rev; |
0 | static char rcsid[] = "$Id: stage3hr.c 195760 2016-08-04 00:12:04Z twu $"; | |
0 | static char rcsid[] = "$Id: stage3hr.c 196429 2016-08-16 20:09:56Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
1781 | 1781 | Substring_T substring; |
1782 | 1782 | Junction_T junction; |
1783 | 1783 | |
1784 | debug0(printf("Freeing Stage3end %p of type %s\n",*old,hittype_string((*old)->hittype))); | |
1784 | #ifdef DEBUG0 | |
1785 | printf("Freeing Stage3end %p of type %s",*old,hittype_string((*old)->hittype)); | |
1786 | if ((*old)->hittype == SUBSTRINGS) { | |
1787 | if (Substring_list_ambiguous_p((*old)->substrings_1toN) == true) { | |
1788 | printf(" ambiguous"); | |
1789 | } else { | |
1790 | printf(" not ambiguous"); | |
1791 | } | |
1792 | } | |
1793 | printf("\n"); | |
1794 | #endif | |
1785 | 1795 | |
1786 | 1796 | #if 0 |
1787 | 1797 | FREE_OUT((*old)->ambcoords_donor); |
7862 | 7872 | new->genomicstart = Substring_genomicstart(acceptor); |
7863 | 7873 | new->genomicend = Substring_genomicend(acceptor); |
7864 | 7874 | |
7865 | donor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(acceptor), | |
7866 | /*splice_pos*/Substring_querystart(acceptor),querylength, | |
7867 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7868 | new->plusp,new->genestrand, | |
7869 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
7870 | /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false, | |
7871 | /*substring1p*/true); | |
7875 | donor = Substring_new_ambig_D(/*querystart*/0,/*queryend*/Substring_querystart(acceptor), | |
7876 | /*splice_pos*/Substring_querystart(acceptor),querylength, | |
7877 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7878 | new->plusp,new->genestrand, | |
7879 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
7880 | /*amb_common_prob*/acceptor_prob,/*substring1p*/true); | |
7872 | 7881 | debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n", |
7873 | 7882 | 0,Substring_querystart(acceptor),Substring_nmatches(donor))); |
7874 | 7883 | donor_prob = Doublelist_max(amb_probs_donor); |
7877 | 7886 | new->genomicstart = Substring_genomicstart(donor); |
7878 | 7887 | new->genomicend = Substring_genomicend(donor); |
7879 | 7888 | |
7880 | acceptor = Substring_new_ambig(/*querystart*/Substring_queryend(donor),/*queryend*/querylength, | |
7881 | /*splice_pos*/Substring_queryend(donor),querylength, | |
7882 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7883 | new->plusp,new->genestrand, | |
7884 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
7885 | /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true, | |
7886 | /*substring1p*/false); | |
7889 | acceptor = Substring_new_ambig_A(/*querystart*/Substring_queryend(donor),/*queryend*/querylength, | |
7890 | /*splice_pos*/Substring_queryend(donor),querylength, | |
7891 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7892 | new->plusp,new->genestrand, | |
7893 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
7894 | /*amb_common_prob*/donor_prob,/*substring1p*/false); | |
7887 | 7895 | debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n", |
7888 | 7896 | Substring_queryend(donor),querylength,Substring_nmatches(acceptor))); |
7889 | 7897 | acceptor_prob = Doublelist_max(amb_probs_acceptor); |
7899 | 7907 | new->genomicstart = Substring_genomicstart(acceptor); |
7900 | 7908 | new->genomicend = Substring_genomicend(acceptor); |
7901 | 7909 | |
7902 | donor = Substring_new_ambig(/*querystart*/Substring_queryend(acceptor),/*queryend*/querylength, | |
7903 | /*splice_pos*/Substring_queryend(acceptor),querylength, | |
7904 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7905 | new->plusp,new->genestrand, | |
7906 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
7907 | /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false, | |
7908 | /*substring1p*/false); | |
7910 | donor = Substring_new_ambig_D(/*querystart*/Substring_queryend(acceptor),/*queryend*/querylength, | |
7911 | /*splice_pos*/Substring_queryend(acceptor),querylength, | |
7912 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7913 | new->plusp,new->genestrand, | |
7914 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
7915 | /*amb_common_prob*/acceptor_prob,/*substring1p*/false); | |
7909 | 7916 | debug0(printf("Making antisense ambiguous donor at %d..%d with %d matches\n", |
7910 | 7917 | Substring_queryend(acceptor),querylength,Substring_nmatches(donor))); |
7911 | 7918 | donor_prob = Doublelist_max(amb_probs_donor); |
7914 | 7921 | new->genomicstart = Substring_genomicstart(donor); |
7915 | 7922 | new->genomicend = Substring_genomicend(donor); |
7916 | 7923 | |
7917 | acceptor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(donor), | |
7918 | /*splice_pos*/Substring_querystart(donor),querylength, | |
7919 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7920 | new->plusp,new->genestrand, | |
7921 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
7922 | /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true, | |
7923 | /*substring1p*/true); | |
7924 | acceptor = Substring_new_ambig_A(/*querystart*/0,/*queryend*/Substring_querystart(donor), | |
7925 | /*splice_pos*/Substring_querystart(donor),querylength, | |
7926 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
7927 | new->plusp,new->genestrand, | |
7928 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
7929 | /*amb_common_prob*/donor_prob,/*substring1p*/true); | |
7924 | 7930 | debug0(printf("Making antisense ambiguous acceptor at %d..%d with %d matches\n", |
7925 | 7931 | 0,Substring_querystart(donor),Substring_nmatches(acceptor))); |
7926 | 7932 | acceptor_prob = Doublelist_max(amb_probs_acceptor); |
7989 | 7995 | new->substrings_1toN = List_copy(new->substrings_LtoH); |
7990 | 7996 | new->substrings_Nto1 = List_reverse(List_copy(new->substrings_LtoH)); |
7991 | 7997 | assert(Substring_querystart(List_head(new->substrings_1toN)) < Substring_querystart(List_head(new->substrings_Nto1))); |
7992 | ||
7993 | 7998 | |
7994 | 7999 | if (first_read_p == true) { |
7995 | 8000 | substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1); |
8153 | 8158 | debug0(printf("Returning new splice %p at genomic %u..%u, donor %p (%u => %u), acceptor %p (%u => %u), score %d\n", |
8154 | 8159 | new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,donor, |
8155 | 8160 | donor == NULL ? 0 : Substring_left_genomicseg(donor), |
8156 | donor == NULL ? 0 : Substring_splicecoord(donor), | |
8161 | donor == NULL ? 0 : Substring_splicecoord_D(donor), | |
8157 | 8162 | acceptor,acceptor == NULL ? 0 : Substring_left_genomicseg(acceptor), |
8158 | acceptor == NULL ? 0 : Substring_splicecoord(acceptor),new->score)); | |
8163 | acceptor == NULL ? 0 : Substring_splicecoord_A(acceptor),new->score)); | |
8159 | 8164 | debug0(printf("sensedir %d\n",new->sensedir)); |
8160 | 8165 | return new; |
8161 | 8166 | } |
8237 | 8242 | /* Compute distances */ |
8238 | 8243 | if (donor == NULL) { |
8239 | 8244 | new->shortexonA_distance = 0; |
8240 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) { | |
8241 | new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor); | |
8242 | } else { | |
8243 | new->shortexonA_distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon); | |
8245 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) { | |
8246 | new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor); | |
8247 | } else { | |
8248 | new->shortexonA_distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon); | |
8244 | 8249 | } |
8245 | 8250 | |
8246 | 8251 | if (acceptor == NULL) { |
8247 | 8252 | new->shortexonD_distance = 0; |
8248 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) { | |
8249 | new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor); | |
8250 | } else { | |
8251 | new->shortexonD_distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon); | |
8253 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) { | |
8254 | new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor); | |
8255 | } else { | |
8256 | new->shortexonD_distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon); | |
8252 | 8257 | } |
8253 | 8258 | new->distance = new->shortexonA_distance + new->shortexonD_distance; |
8254 | 8259 | |
8268 | 8273 | if (sensedir == SENSE_FORWARD) { |
8269 | 8274 | substring0 = copy_donor_p ? Substring_copy(donor) : donor; |
8270 | 8275 | if (donor == NULL) { |
8271 | donor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon), | |
8272 | /*splice_pos*/Substring_querystart(shortexon),querylength, | |
8273 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8274 | new->plusp,new->genestrand, | |
8275 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
8276 | /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false, | |
8277 | /*substring1p*/true); | |
8276 | donor = substring0 = Substring_new_ambig_D(/*querystart*/0,/*queryend*/Substring_querystart(shortexon), | |
8277 | /*splice_pos*/Substring_querystart(shortexon),querylength, | |
8278 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8279 | new->plusp,new->genestrand, | |
8280 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
8281 | /*amb_common_prob*/acceptor_prob,/*substring1p*/true); | |
8278 | 8282 | /* new->start_amb_prob = Doublelist_max(amb_probs_donor); */ |
8279 | 8283 | /* new->start_amb_length = amb_length_donor; */ |
8280 | 8284 | junction0 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob); |
8281 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) { | |
8282 | distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor); | |
8285 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) { | |
8286 | distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor); | |
8283 | 8287 | junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob); |
8284 | 8288 | } else { |
8285 | distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon); | |
8289 | distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon); | |
8286 | 8290 | junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob); |
8287 | 8291 | } |
8288 | 8292 | |
8289 | 8293 | substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor; |
8290 | 8294 | if (acceptor == NULL) { |
8291 | acceptor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength, | |
8292 | /*splice_pos*/Substring_queryend(shortexon),querylength, | |
8293 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8294 | new->plusp,new->genestrand, | |
8295 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
8296 | /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true, | |
8297 | /*substring1p*/false); | |
8295 | acceptor = substring2 = Substring_new_ambig_A(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength, | |
8296 | /*splice_pos*/Substring_queryend(shortexon),querylength, | |
8297 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8298 | new->plusp,new->genestrand, | |
8299 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
8300 | /*amb_common_prob*/donor_prob,/*substring1p*/false); | |
8298 | 8301 | /* new->end_amb_prob = Doublelist_max(amb_probs_acceptor); */ |
8299 | 8302 | /* new->end_amb_length = amb_length_acceptor; */ |
8300 | 8303 | junction2 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor)); |
8301 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) { | |
8302 | distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor); | |
8304 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) { | |
8305 | distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor); | |
8303 | 8306 | junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob); |
8304 | 8307 | } else { |
8305 | distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon); | |
8308 | distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon); | |
8306 | 8309 | junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob); |
8307 | 8310 | } |
8308 | 8311 | |
8309 | 8312 | } else if (sensedir == SENSE_ANTI) { |
8310 | 8313 | substring0 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor; |
8311 | 8314 | if (acceptor == NULL) { |
8312 | acceptor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon), | |
8313 | /*splice_pos*/Substring_querystart(shortexon),querylength, | |
8314 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8315 | new->plusp,new->genestrand, | |
8316 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
8317 | /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true, | |
8318 | /*substring1p*/true); | |
8315 | acceptor = substring0 = Substring_new_ambig_A(/*querystart*/0,/*queryend*/Substring_querystart(shortexon), | |
8316 | /*splice_pos*/Substring_querystart(shortexon),querylength, | |
8317 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8318 | new->plusp,new->genestrand, | |
8319 | ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor, | |
8320 | /*amb_common_prob*/donor_prob,/*substring1p*/true); | |
8319 | 8321 | /* new->start_amb_prob = Doublelist_max(amb_probs_acceptor); */ |
8320 | 8322 | /* new->start_amb_length = amb_length_acceptor; */ |
8321 | 8323 | junction0 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor)); |
8322 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) { | |
8323 | distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor); | |
8324 | } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord_A(acceptor)) { | |
8325 | distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord_A(acceptor); | |
8324 | 8326 | junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob); |
8325 | 8327 | } else { |
8326 | distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon); | |
8328 | distance = Substring_splicecoord_A(acceptor) - Substring_splicecoord_D(shortexon); | |
8327 | 8329 | junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob); |
8328 | 8330 | } |
8329 | 8331 | |
8330 | 8332 | substring2 = copy_donor_p ? Substring_copy(donor) : donor; |
8331 | 8333 | if (donor == NULL) { |
8332 | donor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength, | |
8333 | /*splice_pos*/Substring_queryend(shortexon),querylength, | |
8334 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8335 | new->plusp,new->genestrand, | |
8336 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
8337 | /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false, | |
8338 | /*substring1p*/false); | |
8334 | donor = substring2 = Substring_new_ambig_D(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength, | |
8335 | /*splice_pos*/Substring_queryend(shortexon),querylength, | |
8336 | new->chrnum,new->chroffset,new->chrhigh,new->chrlength, | |
8337 | new->plusp,new->genestrand, | |
8338 | ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor, | |
8339 | /*amb_common_prob*/acceptor_prob,/*substring1p*/false); | |
8339 | 8340 | /* new->end_amb_prob = Doublelist_max(amb_probs_donor); */ |
8340 | 8341 | /* new->end_amb_length = amb_length_donor; */ |
8341 | 8342 | junction2 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob); |
8342 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) { | |
8343 | distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor); | |
8343 | } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord_D(donor)) { | |
8344 | distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord_D(donor); | |
8344 | 8345 | junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob); |
8345 | 8346 | } else { |
8346 | distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon); | |
8347 | distance = Substring_splicecoord_D(donor) - Substring_splicecoord_A(shortexon); | |
8347 | 8348 | junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob); |
8348 | 8349 | } |
8349 | 8350 | |
10153 | 10154 | if (hit->hittype == TERMINAL) { |
10154 | 10155 | /* Don't allow terminals to set trims */ |
10155 | 10156 | |
10157 | } else if (hit->hittype == INSERTION || hit->hittype == DELETION) { | |
10158 | /* Don't allow indels to set trims, since they artificially align at the end */ | |
10159 | ||
10156 | 10160 | #if 0 |
10157 | 10161 | } else if ((hit->hittype == INSERTION || hit->hittype == DELETION) && |
10158 | 10162 | (hit->indel_pos < 15 || hit->indel_pos > hit->querylength - 15)) { |
11389 | 11393 | } |
11390 | 11394 | #endif |
11391 | 11395 | |
11392 | /* Favors definitive splices over ambiguous splices. So need to | |
11393 | make sure we don't make definitive splices unnecessarily */ | |
11396 | /* Favors ambiguous splices over definitive splices */ | |
11394 | 11397 | if (hit->nsegments > best_hit->nsegments) { |
11395 | 11398 | if (hit->nmatches_posttrim > best_hit->nmatches_posttrim) { |
11396 | 11399 | /* More segments and strictly more matches */ |
11432 | 11435 | debug7(printf(" => %d wins by hittype\n",k)); |
11433 | 11436 | return +1; |
11434 | 11437 | |
11435 | #if 0 | |
11436 | } else if (hit->start_amb_length + hit->end_amb_length == 0 && | |
11437 | best_hit->start_amb_length + best_hit->end_amb_length > 0) { | |
11438 | } else if (start_amb_length(hit) + end_amb_length(hit) > 0 && | |
11439 | start_amb_length(best_hit) + end_amb_length(best_hit) == 0) { | |
11438 | 11440 | debug7(printf(" => %d loses by ambiguity\n",k)); |
11439 | 11441 | return -1; |
11440 | } else if (hit->start_amb_length + hit->end_amb_length > 0 && | |
11441 | best_hit->start_amb_length + best_hit->end_amb_length == 0) { | |
11442 | } else if (start_amb_length(hit) + end_amb_length(hit) == 0 && | |
11443 | start_amb_length(best_hit) + end_amb_length(best_hit) > 0) { | |
11442 | 11444 | debug7(printf(" => %d wins by ambiguity\n",k)); |
11443 | 11445 | return +1; |
11444 | #endif | |
11445 | 11446 | |
11446 | 11447 | } else if (hit->nindels > best_hit->nindels) { |
11447 | 11448 | debug7(printf(" => %d loses by nindels\n",k)); |
15506 | 15507 | #endif |
15507 | 15508 | |
15508 | 15509 | |
15509 | /* Favors definitive splices over ambiguous splices. So need to | |
15510 | make sure we don't make definitive splices unnecessarily */ | |
15511 | ||
15510 | /* Favors ambiguous splices over definitive splices */ | |
15512 | 15511 | if (hitpair->hit5->nsegments + hitpair->hit3->nsegments > best_hitpair->hit5->nsegments + best_hitpair->hit3->nsegments) { |
15513 | 15512 | if (hitpair->nmatches_posttrim > best_hitpair->nmatches_posttrim) { |
15514 | 15513 | /* More segments and strictly more matches */ |
15597 | 15596 | return +1; |
15598 | 15597 | #endif |
15599 | 15598 | |
15600 | #if 0 | |
15601 | } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length + | |
15602 | hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length > 0 && | |
15603 | best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length + | |
15604 | best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length == 0) { | |
15599 | } else if (start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5) + | |
15600 | start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3) == 0 && | |
15601 | start_amb_length(best_hitpair->hit5) + end_amb_length(best_hitpair->hit5) + | |
15602 | start_amb_length(best_hitpair->hit3) + end_amb_length(best_hitpair->hit3) > 0) { | |
15605 | 15603 | debug8(printf(" => loses by ambiguity\n")); |
15606 | 15604 | return -1; |
15607 | 15605 | |
15608 | } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length + | |
15609 | hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length == 0 && | |
15610 | best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length + | |
15611 | best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length > 0) { | |
15606 | } else if (start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5) + | |
15607 | start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3) > 0 && | |
15608 | start_amb_length(best_hitpair->hit5) + end_amb_length(best_hitpair->hit5) + | |
15609 | start_amb_length(best_hitpair->hit3) + end_amb_length(best_hitpair->hit3) == 0) { | |
15612 | 15610 | debug8(printf(" => wins by ambiguity\n")); |
15613 | 15611 | return +1; |
15614 | #endif | |
15615 | 15612 | |
15616 | 15613 | #if 0 |
15617 | 15614 | } else if (hitpair->absdifflength < best_hitpair->absdifflength) { |
16531 | 16528 | if (hit5->hittype == TERMINAL) { |
16532 | 16529 | /* Don't allow terminals to set trims */ |
16533 | 16530 | |
16531 | } else if (hit5->hittype == INSERTION || hit5->hittype == DELETION) { | |
16532 | /* Don't allow indels to set trims, since they artificially align at the end */ | |
16533 | ||
16534 | 16534 | #if 0 |
16535 | 16535 | } else if ((hit5->hittype == INSERTION || hit5->hittype == DELETION) && |
16536 | 16536 | (hit5->indel_pos < 15 || hit5->indel_pos > hit5->querylength - 15)) { |
16552 | 16552 | |
16553 | 16553 | if (hit3->hittype == TERMINAL) { |
16554 | 16554 | /* Don't allow terminals to set trims */ |
16555 | ||
16556 | } else if (hit3->hittype == INSERTION || hit3->hittype == DELETION) { | |
16557 | /* Don't allow indels to set trims, since they artificially align at the end */ | |
16555 | 16558 | |
16556 | 16559 | #if 0 |
16557 | 16560 | } else if ((hit3->hittype == INSERTION || hit3->hittype == DELETION) && |
17357 | 17360 | (*nconcordant)++; |
17358 | 17361 | } |
17359 | 17362 | |
17360 | if (0 && *nconcordant > maxpairedpaths) { | |
17363 | if (*nconcordant > maxpairedpaths) { | |
17361 | 17364 | debug(printf(" -- %d concordant paths exceeds %d",*nconcordant,maxpairedpaths)); |
17362 | 17365 | *abort_pairing_p = true; |
17363 | 17366 | } |
17477 | 17480 | (*nconcordant)++; |
17478 | 17481 | } |
17479 | 17482 | |
17480 | if (0 && *nconcordant > maxpairedpaths) { | |
17483 | if (*nconcordant > maxpairedpaths) { | |
17481 | 17484 | debug(printf(" -- %d concordant paths exceeds %d",*nconcordant,maxpairedpaths)); |
17482 | 17485 | *abort_pairing_p = true; |
17483 | 17486 | } |
0 | /* $Id: stage3hr.h 195760 2016-08-04 00:12:04Z twu $ */ | |
0 | /* $Id: stage3hr.h 196273 2016-08-12 15:15:06Z twu $ */ | |
1 | 1 | #ifndef STAGE3HR_INCLUDED |
2 | 2 | #define STAGE3HR_INCLUDED |
3 | 3 |
0 | static char rcsid[] = "$Id: substring.c 195961 2016-08-08 16:36:34Z twu $"; | |
0 | static char rcsid[] = "$Id: substring.c 196404 2016-08-16 14:47:49Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
30 | 30 | #define SCRAMBLE_TEXT "scramble" |
31 | 31 | |
32 | 32 | #define END_SPLICESITE_SEARCH 10 |
33 | #define MIN_EXON_LENGTH 9 | |
33 | 34 | #define END_SPLICESITE_PROB_MATCH 0.90 |
34 | 35 | #define END_SPLICESITE_PROB_MISMATCH 0.95 |
35 | 36 | |
190 | 191 | return ""; |
191 | 192 | } |
192 | 193 | |
194 | char * | |
195 | Trimaction_string (Trimaction_T trimaction) { | |
196 | switch (trimaction) { | |
197 | case NO_TRIM: return "NO_TRIM"; | |
198 | case PRE_TRIMMED: return "PRE_TRIMMED"; | |
199 | case COMPUTE_TRIM: return "COMPUTE_TRIM"; | |
200 | default: | |
201 | fprintf(stderr,"Unexpected trimaction %d\n",trimaction); | |
202 | abort(); | |
203 | } | |
204 | return ""; | |
205 | } | |
206 | ||
207 | ||
193 | 208 | |
194 | 209 | static char complCode[128] = COMPLEMENT_LC; |
195 | 210 | |
296 | 311 | /* for splices */ |
297 | 312 | int chimera_sensedir; |
298 | 313 | |
299 | Univcoord_T splicecoord; | |
300 | int splicesites_knowni; /* Needed for intragenic_splice_p in stage1hr.c */ | |
301 | ||
302 | bool chimera_knownp; /* Used for computing Substring_nchimera_known */ | |
303 | bool chimera_novelp; | |
304 | Univcoord_T chimera_modelpos; | |
305 | int chimera_pos; | |
306 | double chimera_prob; | |
314 | Univcoord_T splicecoord_D; | |
315 | int splicesitesD_knowni; /* Needed for intragenic_splice_p in stage1hr.c */ | |
316 | ||
317 | bool siteD_knownp; /* Used for computing Substring_nchimera_known */ | |
318 | bool siteD_novelp; | |
319 | int siteD_pos; | |
320 | double siteD_prob; | |
307 | 321 | |
308 | 322 | /* for shortexon (always use *_1 for acceptor and *_2 for donor) */ |
309 | 323 | /* for donor/acceptor: the ambiguous position */ |
310 | Univcoord_T splicecoord_2; | |
311 | int splicesites_knowni_2; | |
312 | ||
313 | bool chimera_knownp_2; | |
314 | bool chimera_novelp_2; | |
315 | Univcoord_T chimera_modelpos_2; | |
316 | int chimera_pos_2; | |
317 | double chimera_prob_2; | |
318 | ||
324 | Univcoord_T splicecoord_A; | |
325 | int splicesitesA_knowni; | |
326 | ||
327 | bool siteA_knownp; | |
328 | bool siteA_novelp; | |
329 | int siteA_pos; | |
319 | 330 | double siteA_prob; |
320 | double siteD_prob; | |
331 | ||
332 | Univcoord_T splicecoord_N; /* For DNA fusions */ | |
333 | int siteN_pos; | |
334 | ||
321 | 335 | |
322 | 336 | bool ambiguous_p; |
323 | 337 | int nambcoords; |
325 | 339 | int *amb_knowni; |
326 | 340 | int *amb_nmismatches; |
327 | 341 | double *amb_probs; |
328 | double amb_common_prob; | |
329 | bool amb_donor_common_p; | |
342 | Endtype_T amb_type; /* Ambiguous DONs or ACCs */ | |
330 | 343 | }; |
331 | 344 | |
332 | 345 | |
354 | 367 | this->alignend += chrlength; |
355 | 368 | this->alignstart_trim += chrlength; |
356 | 369 | this->alignend_trim += chrlength; |
357 | this->chimera_modelpos += chrlength; | |
358 | this->chimera_modelpos_2 += chrlength; | |
370 | this->splicecoord_D += chrlength; | |
371 | this->splicecoord_A += chrlength; | |
372 | this->splicecoord_N += chrlength; | |
359 | 373 | } |
360 | 374 | |
361 | 375 | return; |
386 | 400 | this->alignend -= chrlength; |
387 | 401 | this->alignstart_trim -= chrlength; |
388 | 402 | this->alignend_trim -= chrlength; |
389 | this->chimera_modelpos -= chrlength; | |
390 | this->chimera_modelpos_2 -= chrlength; | |
403 | this->splicecoord_D -= chrlength; | |
404 | this->splicecoord_A -= chrlength; | |
405 | this->splicecoord_N -= chrlength; | |
391 | 406 | } |
392 | 407 | |
393 | 408 | return; |
628 | 643 | int trim5, alignlength, pos, prevpos, i; |
629 | 644 | int nmismatches; |
630 | 645 | |
631 | #ifdef HAVE_ALLOCA | |
646 | #if defined(LONG_READLENGTHS) | |
647 | int *mismatch_positions = (int *) MALLOC(querylength*sizeof(int)); | |
648 | #elif defined(HAVE_ALLOCA) | |
632 | 649 | int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int)); |
633 | 650 | #else |
634 | 651 | int mismatch_positions[MAX_READLENGTH]; |
734 | 751 | } |
735 | 752 | } |
736 | 753 | |
754 | #if defined(LONG_READLENGTHS) | |
755 | FREE(mismatch_positions); | |
756 | #elif defined(HAVE_ALLOCA) | |
757 | FREEA(mismatch_positions); | |
758 | #else | |
759 | /* Hard-coded use of MAX_READLENGTH */ | |
760 | #endif | |
737 | 761 | |
738 | 762 | debug8(printf("Trim left pos 0, score %d, trim5 %d, nmismatches_end %d\n",score,trim5,*nmismatches_end)); |
739 | 763 | debug8(printf("\n")); |
752 | 776 | int trim3, alignlength, pos, prevpos, i; |
753 | 777 | int nmismatches; |
754 | 778 | |
755 | #ifdef HAVE_ALLOCA | |
779 | #if defined(LONG_READLENGTHS) | |
780 | int *mismatch_positions = (int *) MALLOC(querylength*sizeof(int)); | |
781 | #elif defined(HAVE_ALLOCA) | |
756 | 782 | int *mismatch_positions = (int *) ALLOCA(querylength*sizeof(int)); |
757 | 783 | #else |
758 | 784 | int mismatch_positions[MAX_READLENGTH]; |
854 | 880 | } |
855 | 881 | } |
856 | 882 | |
883 | #if defined(LONG_READLENGTHS) | |
884 | FREE(mismatch_positions); | |
885 | #elif defined(HAVE_ALLOCA) | |
886 | FREEA(mismatch_positions); | |
887 | #else | |
888 | /* Hard-coded use of MAX_READLENGTH */ | |
889 | #endif | |
857 | 890 | |
858 | 891 | debug8(printf("Trim right pos %d, score %d, trim3 %d, nmismatches_end %d\n",queryend-1,score,trim3,*nmismatches_end)); |
859 | 892 | debug8(printf("\n")); |
1787 | 1820 | |
1788 | 1821 | |
1789 | 1822 | /* Modified from trim_novel_spliceends in stage3.c */ |
1790 | void | |
1823 | /* Note: If substring does not extend to ends of query, then region | |
1824 | beyond querystart and queryend might actually be matching, and not | |
1825 | mismatches. Could fix in the future. */ | |
1826 | static void | |
1791 | 1827 | substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3, |
1792 | 1828 | Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3, |
1793 | 1829 | double *ambig_prob_5, double *ambig_prob_3, int *sensedir, |
1807 | 1843 | int splice_sensedir_5, splice_sensedir_3, splice_sensedir_5_mm, splice_sensedir_3_mm; |
1808 | 1844 | |
1809 | 1845 | |
1810 | debug13(printf("\nEntered Substring_trim_novel_spliceends with sensedir %d\n",*sensedir)); | |
1846 | debug13(printf("\nEntered substring_trim_novel_spliceends with sensedir %d\n",*sensedir)); | |
1811 | 1847 | *ambig_end_length_5 = 0; |
1812 | 1848 | *ambig_end_length_3 = 0; |
1849 | *ambig_prob_5 = 0.0; | |
1850 | *ambig_prob_3 = 0.0; | |
1813 | 1851 | |
1814 | 1852 | /* start is distal, end is medial */ |
1815 | 1853 | if (substringN == NULL) { |
1816 | 1854 | /* Skip 3' end*/ |
1817 | 1855 | } else if (substringN->plusp == true) { |
1818 | start = substringN->genomicend; | |
1819 | 1856 | middle = substringN->alignend_trim + 1; |
1820 | if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim) { | |
1821 | end = substringN->alignstart_trim; | |
1822 | } | |
1823 | } else { | |
1824 | start = substringN->genomicend; | |
1857 | if ((start = middle + END_SPLICESITE_SEARCH) > substringN->genomicend) { | |
1858 | start = substringN->genomicend; | |
1859 | } | |
1860 | if ((end = middle - END_SPLICESITE_SEARCH) < substringN->alignstart_trim + MIN_EXON_LENGTH) { | |
1861 | end = substringN->alignstart_trim + MIN_EXON_LENGTH; | |
1862 | } | |
1863 | debug13(printf("\n1 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end)); | |
1864 | ||
1865 | } else { | |
1825 | 1866 | middle = substringN->alignend_trim - 1; |
1826 | if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim) { | |
1827 | end = substringN->alignstart_trim; | |
1828 | } | |
1867 | if ((start = middle - END_SPLICESITE_SEARCH) < substringN->genomicend) { | |
1868 | start = substringN->genomicend; | |
1869 | } | |
1870 | if ((end = middle + END_SPLICESITE_SEARCH) > substringN->alignstart_trim - MIN_EXON_LENGTH) { | |
1871 | end = substringN->alignstart_trim - MIN_EXON_LENGTH; | |
1872 | } | |
1873 | debug13(printf("\n2 Set end points for 3' trim to be %u..%u..%u\n",start,middle,end)); | |
1829 | 1874 | } |
1830 | 1875 | |
1831 | 1876 | if (substringN == NULL) { |
1838 | 1883 | middle_genomicpos = middle; |
1839 | 1884 | end_genomicpos = end; |
1840 | 1885 | |
1841 | assert(start_genomicpos >= end_genomicpos); | |
1886 | /* assert(start_genomicpos >= end_genomicpos); */ | |
1842 | 1887 | genomicpos = start_genomicpos; |
1843 | 1888 | while (genomicpos >= middle_genomicpos) { |
1844 | 1889 | donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */ |
1867 | 1912 | middle_genomicpos = middle; |
1868 | 1913 | end_genomicpos = end; |
1869 | 1914 | |
1870 | assert(start_genomicpos <= end_genomicpos); | |
1915 | /* assert(start_genomicpos <= end_genomicpos); */ | |
1871 | 1916 | genomicpos = start_genomicpos; |
1872 | 1917 | while (genomicpos <= middle_genomicpos) { |
1873 | 1918 | donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */ |
1898 | 1943 | middle_genomicpos = middle; |
1899 | 1944 | end_genomicpos = end; |
1900 | 1945 | |
1901 | assert(start_genomicpos >= end_genomicpos); | |
1946 | /* assert(start_genomicpos >= end_genomicpos); */ | |
1902 | 1947 | genomicpos = start_genomicpos; |
1903 | 1948 | while (genomicpos >= middle_genomicpos) { |
1904 | 1949 | acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 5 */ |
1927 | 1972 | middle_genomicpos = middle; |
1928 | 1973 | end_genomicpos = end; |
1929 | 1974 | |
1930 | assert(start_genomicpos <= end_genomicpos); | |
1975 | /* assert(start_genomicpos <= end_genomicpos); */ | |
1931 | 1976 | genomicpos = start_genomicpos; |
1932 | 1977 | while (genomicpos <= middle_genomicpos) { |
1933 | 1978 | acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 7 */ |
1956 | 2001 | middle_genomicpos = middle; |
1957 | 2002 | end_genomicpos = end; |
1958 | 2003 | |
1959 | assert(start_genomicpos >= end_genomicpos); | |
2004 | /* assert(start_genomicpos >= end_genomicpos); */ | |
1960 | 2005 | genomicpos = start_genomicpos; |
1961 | 2006 | while (genomicpos >= middle_genomicpos) { |
1962 | 2007 | donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 1 */ |
2017 | 2062 | middle_genomicpos = middle; |
2018 | 2063 | end_genomicpos = end; |
2019 | 2064 | |
2020 | assert(start_genomicpos <= end_genomicpos); | |
2065 | /* assert(start_genomicpos <= end_genomicpos); */ | |
2021 | 2066 | genomicpos = start_genomicpos; |
2022 | 2067 | while (genomicpos <= middle_genomicpos) { |
2023 | 2068 | donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 3 */ |
2083 | 2128 | Splicetype_string(splicetype3),splice_genomepos_3-chroffset,max_prob_3)); |
2084 | 2129 | if (substringN->plusp) { |
2085 | 2130 | *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3; |
2131 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3)); | |
2086 | 2132 | } else { |
2087 | 2133 | *ambig_end_length_3 = splice_genomepos_3 - substringN->genomicend; |
2134 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3,substringN->genomicend)); | |
2088 | 2135 | } |
2089 | 2136 | *ambig_splicetype_3 = splicetype3; |
2090 | 2137 | *ambig_prob_3 = max_prob_3; |
2091 | debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3)); | |
2092 | 2138 | |
2093 | 2139 | } else if (max_prob_3_mm > END_SPLICESITE_PROB_MISMATCH) { |
2094 | 2140 | debug13(printf("Found good mismatch splice %s on 3' end at %u with probability %f\n", |
2095 | 2141 | Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm)); |
2096 | 2142 | if (substringN->plusp) { |
2097 | 2143 | *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm; |
2144 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm)); | |
2098 | 2145 | } else { |
2099 | 2146 | *ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend; |
2147 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend)); | |
2100 | 2148 | } |
2101 | 2149 | *ambig_splicetype_3 = splicetype3_mm; |
2102 | 2150 | *ambig_prob_3 = max_prob_3_mm; |
2103 | debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3)); | |
2104 | 2151 | } |
2105 | 2152 | } |
2106 | 2153 | |
2109 | 2156 | if (substring1 == NULL) { |
2110 | 2157 | /* Skip 5' end */ |
2111 | 2158 | } else if (substring1->plusp == true) { |
2112 | start = substring1->genomicstart; | |
2113 | 2159 | middle = substring1->alignstart_trim - 1; |
2114 | if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim) { | |
2115 | end = substring1->alignend_trim; | |
2116 | } | |
2117 | } else { | |
2118 | start = substring1->genomicstart; | |
2160 | if ((start = middle - END_SPLICESITE_SEARCH) < substring1->genomicstart) { | |
2161 | start = substring1->genomicstart; | |
2162 | } | |
2163 | if ((end = middle + END_SPLICESITE_SEARCH) > substring1->alignend_trim - MIN_EXON_LENGTH) { | |
2164 | end = substring1->alignend_trim - MIN_EXON_LENGTH; | |
2165 | } | |
2166 | debug13(printf("\n1 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end)); | |
2167 | ||
2168 | } else { | |
2119 | 2169 | middle = substring1->alignstart_trim + 1; |
2120 | if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim) { | |
2121 | end = substring1->alignend_trim; | |
2122 | } | |
2170 | if ((start = middle + END_SPLICESITE_SEARCH) > substring1->genomicstart) { | |
2171 | start = substring1->genomicstart; | |
2172 | } | |
2173 | if ((end = middle - END_SPLICESITE_SEARCH) < substring1->alignend_trim + MIN_EXON_LENGTH) { | |
2174 | end = substring1->alignend_trim + MIN_EXON_LENGTH; | |
2175 | } | |
2176 | debug13(printf("\n2 Set end points for 5' trim to be %u..%u..%u\n",start,middle,end)); | |
2123 | 2177 | } |
2124 | 2178 | |
2125 | 2179 | if (substring1 == NULL) { |
2132 | 2186 | middle_genomicpos = middle; |
2133 | 2187 | end_genomicpos = end; |
2134 | 2188 | |
2135 | assert(start_genomicpos <= end_genomicpos); | |
2189 | /* assert(start_genomicpos <= end_genomicpos); */ | |
2136 | 2190 | genomicpos = start_genomicpos; |
2137 | 2191 | while (genomicpos <= middle_genomicpos) { |
2138 | 2192 | acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */ |
2161 | 2215 | middle_genomicpos = middle; |
2162 | 2216 | end_genomicpos = end; |
2163 | 2217 | |
2164 | assert(start_genomicpos >= end_genomicpos); | |
2218 | /* assert(start_genomicpos >= end_genomicpos); */ | |
2165 | 2219 | genomicpos = start_genomicpos; |
2166 | 2220 | while (genomicpos >= middle_genomicpos) { |
2167 | 2221 | acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */ |
2192 | 2246 | middle_genomicpos = middle; |
2193 | 2247 | end_genomicpos = end; |
2194 | 2248 | |
2195 | assert(start_genomicpos <= end_genomicpos); | |
2249 | /* assert(start_genomicpos <= end_genomicpos); */ | |
2196 | 2250 | genomicpos = start_genomicpos; |
2197 | 2251 | while (genomicpos <= middle_genomicpos) { |
2198 | 2252 | donor_prob = Maxent_hr_antidonor_prob(genomicpos,chroffset); /* Case 6 */ |
2221 | 2275 | middle_genomicpos = middle; |
2222 | 2276 | end_genomicpos = end; |
2223 | 2277 | |
2224 | assert(start_genomicpos >= end_genomicpos); | |
2278 | /* assert(start_genomicpos >= end_genomicpos); */ | |
2225 | 2279 | genomicpos = start_genomicpos; |
2226 | 2280 | while (genomicpos >= middle_genomicpos) { |
2227 | 2281 | donor_prob = Maxent_hr_donor_prob(genomicpos,chroffset); /* Case 8 */ |
2250 | 2304 | middle_genomicpos = middle; |
2251 | 2305 | end_genomicpos = end; |
2252 | 2306 | |
2253 | assert(start_genomicpos <= end_genomicpos); | |
2307 | /* assert(start_genomicpos <= end_genomicpos); */ | |
2254 | 2308 | genomicpos = start_genomicpos; |
2255 | 2309 | while (genomicpos <= middle_genomicpos) { |
2256 | 2310 | acceptor_prob = Maxent_hr_acceptor_prob(genomicpos,chroffset); /* Case 2 */ |
2311 | 2365 | middle_genomicpos = middle; |
2312 | 2366 | end_genomicpos = end; |
2313 | 2367 | |
2314 | assert(start_genomicpos >= end_genomicpos); | |
2368 | /* assert(start_genomicpos >= end_genomicpos); */ | |
2315 | 2369 | genomicpos = start_genomicpos; |
2316 | 2370 | while (genomicpos >= middle_genomicpos) { |
2317 | 2371 | acceptor_prob = Maxent_hr_antiacceptor_prob(genomicpos,chroffset); /* Case 4 */ |
2377 | 2431 | Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5)); |
2378 | 2432 | if (substring1->plusp) { |
2379 | 2433 | *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart; |
2434 | debug13(printf("1 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart)); | |
2380 | 2435 | } else { |
2381 | 2436 | *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5; |
2437 | debug13(printf("2 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5)); | |
2382 | 2438 | } |
2383 | 2439 | *ambig_splicetype_5 = splicetype5; |
2384 | 2440 | *ambig_prob_5 = max_prob_5; |
2385 | debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5)); | |
2386 | 2441 | } else if (max_prob_5_mm > END_SPLICESITE_PROB_MISMATCH) { |
2387 | 2442 | debug13(printf("Found good mismatch splice %s on 5' end at %u with probability %f\n", |
2388 | 2443 | Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm)); |
2389 | 2444 | if (substring1->plusp) { |
2390 | *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart; | |
2445 | *ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart; | |
2446 | debug13(printf("3 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart)); | |
2391 | 2447 | } else { |
2392 | *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5; | |
2448 | *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm; | |
2449 | debug13(printf("4 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm)); | |
2393 | 2450 | } |
2394 | 2451 | *ambig_splicetype_5 = splicetype5_mm; |
2395 | 2452 | *ambig_prob_5 = max_prob_5_mm; |
2396 | debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5)); | |
2397 | 2453 | } |
2398 | 2454 | } |
2399 | 2455 | |
2466 | 2522 | Splicetype_string(splicetype5),splice_genomepos_5-chroffset,max_prob_5)); |
2467 | 2523 | if (substring1->plusp) { |
2468 | 2524 | *ambig_end_length_5 = splice_genomepos_5 - substring1->genomicstart; |
2525 | debug13(printf("5 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5,substring1->genomicstart)); | |
2469 | 2526 | } else { |
2470 | 2527 | *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5; |
2528 | debug13(printf("6 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5)); | |
2471 | 2529 | } |
2472 | 2530 | *ambig_splicetype_5 = splicetype5; |
2473 | 2531 | *ambig_prob_5 = max_prob_5; |
2474 | 2532 | /* *cdna_direction = splice_cdna_direction_5; */ |
2475 | debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5)); | |
2476 | 2533 | if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB_MATCH && max_prob_sense_anti_5 < END_SPLICESITE_PROB_MATCH |
2477 | 2534 | && max_prob_sense_anti_3 < END_SPLICESITE_PROB_MATCH) { |
2478 | 2535 | *sensedir = splice_sensedir_5; |
2490 | 2547 | Splicetype_string(splicetype3_mm),splice_genomepos_3_mm-chroffset,max_prob_3_mm)); |
2491 | 2548 | if (substringN->plusp) { |
2492 | 2549 | *ambig_end_length_3 = substringN->genomicend - splice_genomepos_3_mm; |
2550 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,substringN->genomicend,splice_genomepos_3_mm)); | |
2493 | 2551 | } else { |
2494 | 2552 | *ambig_end_length_3 = splice_genomepos_3_mm - substringN->genomicend; |
2553 | debug13(printf("Set ambig_end_length_3 to be %d = %u - %u\n",*ambig_end_length_3,splice_genomepos_3_mm,substringN->genomicend)); | |
2495 | 2554 | } |
2496 | 2555 | *ambig_splicetype_3 = splicetype3_mm; |
2497 | 2556 | *ambig_prob_3 = max_prob_3_mm; |
2498 | 2557 | /* *cdna_direction = splice_cdna_direction_3_mm; */ |
2499 | debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3)); | |
2500 | 2558 | if (max_prob_sense_forward_3_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH |
2501 | 2559 | && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH) { |
2502 | 2560 | *sensedir = splice_sensedir_3_mm; |
2511 | 2569 | Splicetype_string(splicetype5_mm),splice_genomepos_5_mm-chroffset,max_prob_5_mm)); |
2512 | 2570 | if (substring1->plusp) { |
2513 | 2571 | *ambig_end_length_5 = splice_genomepos_5_mm - substring1->genomicstart; |
2572 | debug13(printf("7 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,splice_genomepos_5_mm,substring1->genomicstart)); | |
2514 | 2573 | } else { |
2515 | 2574 | *ambig_end_length_5 = substring1->genomicstart - splice_genomepos_5_mm; |
2575 | debug13(printf("8 Set ambig_end_length_5 to be %d = %u - %u\n",*ambig_end_length_5,substring1->genomicstart,splice_genomepos_5_mm)); | |
2516 | 2576 | } |
2517 | 2577 | *ambig_splicetype_5 = splicetype5_mm; |
2518 | 2578 | *ambig_prob_5 = max_prob_5_mm; |
2519 | 2579 | /* *cdna_direction = splice_cdna_direction_5_mm; */ |
2520 | debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5)); | |
2521 | 2580 | if (max_prob_sense_forward_5_mm >= END_SPLICESITE_PROB_MISMATCH && max_prob_sense_anti_5_mm < END_SPLICESITE_PROB_MISMATCH |
2522 | 2581 | && max_prob_sense_anti_3_mm < END_SPLICESITE_PROB_MISMATCH) { |
2523 | 2582 | *sensedir = splice_sensedir_5_mm; |
2531 | 2590 | } |
2532 | 2591 | } |
2533 | 2592 | |
2534 | debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d\n",*ambig_end_length_5,*ambig_end_length_3)); | |
2593 | debug13(printf("Returning ambig_end_length_5 %d and ambig_end_length_3 %d, probs %f and %f\n", | |
2594 | *ambig_end_length_5,*ambig_end_length_3,*ambig_prob_5,*ambig_prob_3)); | |
2535 | 2595 | return; |
2536 | 2596 | } |
2537 | 2597 | |
2549 | 2609 | int outofbounds_start, int outofbounds_end, int minlength, int sensedir) { |
2550 | 2610 | T new; |
2551 | 2611 | int nmatches; |
2552 | int nonterminal_trim = 0; | |
2612 | /* int nonterminal_trim = 0; */ | |
2553 | 2613 | |
2554 | 2614 | int ambig_end_length_5, ambig_end_length_3; |
2555 | 2615 | Splicetype_T ambig_splicetype_5, ambig_splicetype_3; |
2556 | 2616 | double ambig_prob_5, ambig_prob_3; |
2557 | 2617 | int nmismatches_end_left, nmismatches_end_right; |
2618 | int trim; | |
2558 | 2619 | |
2559 | 2620 | |
2560 | 2621 | /* General test for goodness over original region */ |
2599 | 2660 | new->plusp = plusp; |
2600 | 2661 | new->genestrand = genestrand; |
2601 | 2662 | |
2602 | new->chimera_prob = 0.0; | |
2603 | new->chimera_knownp = false; | |
2604 | new->chimera_knownp_2 = false; | |
2605 | new->chimera_novelp = false; | |
2606 | new->chimera_novelp_2 = false; | |
2663 | new->splicecoord_D = new->splicecoord_A = new->splicecoord_N = 0; | |
2664 | new->siteD_pos = new->siteA_pos = new->siteN_pos = 0; | |
2665 | ||
2666 | new->siteD_prob = new->siteA_prob = 0.0; | |
2667 | new->siteD_knownp = new->siteA_knownp = false; | |
2668 | new->siteD_novelp = new->siteA_novelp = false; | |
2607 | 2669 | |
2608 | 2670 | debug2(printf("\n***Entered Substring_new with query %d..%d, chrnum %d (chroffset %u, chrhigh %u), plusp %d, outofbounds start %d and end %d\n", |
2609 | 2671 | querystart,queryend,chrnum,chroffset,chrhigh,plusp,outofbounds_start,outofbounds_end)); |
2617 | 2679 | new->genomicend = new->left + querylength; |
2618 | 2680 | |
2619 | 2681 | debug2(printf("left is %u\n",new->left)); |
2682 | debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend)); | |
2620 | 2683 | debug2(printf("querylength is %d, alignstart is %u, alignend is %u\n",querylength,alignstart,alignend)); |
2621 | 2684 | assert(alignstart + outofbounds_start >= chroffset); |
2622 | 2685 | assert(alignend - outofbounds_end <= chrhigh); |
2627 | 2690 | new->genomicstart = new->left + querylength; |
2628 | 2691 | |
2629 | 2692 | debug2(printf("left is %u\n",new->left)); |
2693 | debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend)); | |
2630 | 2694 | debug2(printf("querylength is %d, alignstart is %u, alignend is %u\n",querylength,alignstart,alignend)); |
2631 | 2695 | assert(alignstart - outofbounds_start <= chrhigh); |
2632 | 2696 | assert(alignend + outofbounds_end >= chroffset); |
2654 | 2718 | new->genomic_refdiff = (char *) NULL; |
2655 | 2719 | |
2656 | 2720 | /* Do trimming */ |
2657 | debug8(printf("trim_left_action %d, trim_right_action %d\n",trim_left_action,trim_right_action)); | |
2721 | debug8(printf("trim_left_action %s, trim_right_action %s\n", | |
2722 | Trimaction_string(trim_left_action),Trimaction_string(trim_right_action))); | |
2658 | 2723 | |
2659 | 2724 | new->mandatory_trim_left = 0; |
2660 | 2725 | new->mandatory_trim_right = 0; |
2669 | 2734 | Substring_free(&new); |
2670 | 2735 | return (T) NULL; |
2671 | 2736 | } else { |
2672 | new->trim_left = 0; | |
2737 | new->trim_left = querystart; | |
2673 | 2738 | } |
2674 | 2739 | |
2675 | 2740 | } else if (new->start_endtype == TERM) { |
2676 | 2741 | /* Accept true terminals generated by GSNAP procedure */ |
2677 | new->trim_left = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength, | |
2678 | plusp,genestrand,/*trim_mismatch_score*/-3); | |
2679 | if (outofbounds_start > new->trim_left) { | |
2680 | new->trim_left = outofbounds_start; | |
2681 | } | |
2682 | new->querystart += new->trim_left; | |
2683 | ||
2684 | } else { | |
2685 | new->trim_left = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength, | |
2686 | plusp,genestrand,trim_mismatch_score); | |
2687 | debug13(printf("trim_left %d, nmismatches_end_left = %d\n",new->trim_left,nmismatches_end_left)); | |
2688 | if (outofbounds_start > new->trim_left) { | |
2689 | new->trim_left = outofbounds_start; | |
2690 | } | |
2691 | nonterminal_trim += new->trim_left; | |
2692 | new->querystart += new->trim_left; | |
2742 | trim = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength, | |
2743 | plusp,genestrand,/*trim_mismatch_score*/-3); | |
2744 | debug8(printf("trim_left_end: trim_left +%d from querystart %d, nmismatches_end_left = %d\n", | |
2745 | trim,querystart,nmismatches_end_left)); | |
2746 | if (outofbounds_start > querystart + trim) { | |
2747 | trim = outofbounds_start - querystart; | |
2748 | } | |
2749 | new->querystart += trim; | |
2750 | new->trim_left = new->querystart; | |
2751 | debug8(printf("querystart is now %d\n",new->querystart)); | |
2752 | ||
2753 | } else { | |
2754 | trim = trim_left_end(&nmismatches_end_left,query_compress,new->left,querystart,queryend,querylength, | |
2755 | plusp,genestrand,trim_mismatch_score); | |
2756 | debug8(printf("trim_left_end: trim_left +%d from querystart %d, nmismatches_end_left = %d\n", | |
2757 | trim,querystart,nmismatches_end_left)); | |
2758 | if (outofbounds_start > querystart + trim) { | |
2759 | trim = outofbounds_start - querystart; | |
2760 | } | |
2761 | /* nonterminal_trim += new->trim_left; */ | |
2762 | new->querystart += trim; | |
2763 | new->trim_left = new->querystart; | |
2764 | debug8(printf("querystart is now %d\n",new->querystart)); | |
2693 | 2765 | } |
2694 | 2766 | |
2695 | 2767 | if (trim_right_action == PRE_TRIMMED) { |
2702 | 2774 | debug2(printf("outofbounds_end %d > 0, so returning NULL\n",outofbounds_end)); |
2703 | 2775 | return (T) NULL; |
2704 | 2776 | } else { |
2705 | new->trim_right = 0; | |
2777 | new->trim_right = querylength - queryend; | |
2706 | 2778 | } |
2707 | 2779 | |
2708 | 2780 | } else if (new->end_endtype == TERM) { |
2709 | 2781 | /* Accept true terminals generated by GSNAP procedure */ |
2710 | new->trim_right = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength, | |
2711 | plusp,genestrand,/*trim_mismatch_score*/-3); | |
2712 | if (outofbounds_end > new->trim_right) { | |
2713 | new->trim_right = outofbounds_end; | |
2714 | } | |
2715 | new->queryend -= new->trim_right; | |
2716 | ||
2717 | } else { | |
2718 | new->trim_right = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength, | |
2719 | plusp,genestrand,trim_mismatch_score); | |
2720 | debug13(printf("trim_right %d, nmismatches_end_right = %d\n",new->trim_right,nmismatches_end_right)); | |
2721 | if (outofbounds_end > new->trim_right) { | |
2722 | new->trim_right = outofbounds_end; | |
2723 | } | |
2724 | nonterminal_trim += new->trim_right; | |
2725 | new->queryend -= new->trim_right; | |
2782 | trim = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength, | |
2783 | plusp,genestrand,/*trim_mismatch_score*/-3); | |
2784 | debug8(printf("trim_right_end: trim_right +%d from queryend %d, nmismatches_end_right = %d\n", | |
2785 | trim,queryend,nmismatches_end_right)); | |
2786 | if (outofbounds_end > queryend - trim) { | |
2787 | trim = queryend - outofbounds_end; | |
2788 | } | |
2789 | new->queryend -= trim; | |
2790 | new->trim_right = querylength - new->queryend; | |
2791 | debug8(printf("queryend is now %d\n",new->queryend)); | |
2792 | ||
2793 | } else { | |
2794 | trim = trim_right_end(&nmismatches_end_right,query_compress,new->left,querystart,queryend,querylength, | |
2795 | plusp,genestrand,trim_mismatch_score); | |
2796 | debug8(printf("trim_right_end: trim_right +%d from queryend %d, nmismatches_end_right = %d\n", | |
2797 | trim,queryend,nmismatches_end_right)); | |
2798 | if (outofbounds_end > queryend - trim) { | |
2799 | trim = queryend - outofbounds_end; | |
2800 | } | |
2801 | /* nonterminal_trim += new->trim_right; */ | |
2802 | new->queryend -= trim; | |
2803 | new->trim_right = querylength - new->queryend; | |
2804 | debug8(printf("queryend is now %d\n",new->queryend)); | |
2726 | 2805 | } |
2727 | 2806 | |
2728 | 2807 | #if 0 |
2757 | 2836 | new->nmatches = (new->alignend - new->alignstart) - new->nmismatches_whole; |
2758 | 2837 | |
2759 | 2838 | if (trim_left_action == COMPUTE_TRIM) { |
2760 | if (nmismatches_end_left == 0) { | |
2839 | if (querystart == 0 && nmismatches_end_left == 0) { | |
2840 | debug8(printf("querystart is 0 and nmismatches_end_left is 0, so setting left_end_action to be NO_TRIM\n")); | |
2761 | 2841 | trim_left_action = NO_TRIM; |
2762 | 2842 | } else { |
2763 | new->alignstart_trim += new->trim_left; | |
2843 | new->alignstart_trim = new->genomicstart + new->trim_left; | |
2764 | 2844 | } |
2765 | 2845 | } |
2766 | 2846 | if (trim_right_action == COMPUTE_TRIM) { |
2767 | if (nmismatches_end_right == 0) { | |
2847 | if (queryend == querylength && nmismatches_end_right == 0) { | |
2848 | debug8(printf("queryend is querylength and nmismatches_end_right is 0, so setting right_end_action to be NO_TRIM\n")); | |
2768 | 2849 | trim_right_action = NO_TRIM; |
2769 | 2850 | } else { |
2770 | new->alignend_trim -= new->trim_right; | |
2851 | new->alignend_trim = new->genomicend - new->trim_right; | |
2771 | 2852 | } |
2772 | 2853 | } |
2773 | 2854 | debug2(printf("Got trims of %d and %d => Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n", |
2774 | 2855 | new->trim_left,new->trim_right,new->alignstart_trim,new->alignend_trim, |
2775 | 2856 | new->alignstart_trim - new->chroffset,new->alignend_trim - new->chroffset)); |
2857 | debug2(printf("genomicstart is %u, genomicend is %u\n",new->genomicstart,new->genomicend)); | |
2776 | 2858 | |
2777 | 2859 | new->trim_left_splicep = new->trim_right_splicep = false; |
2778 | 2860 | if (novelsplicingp == true) { |
2782 | 2864 | &sensedir,chroffset); |
2783 | 2865 | if (ambig_end_length_5 > 0) { |
2784 | 2866 | new->trim_left_splicep = true; |
2785 | new->querystart += (ambig_end_length_5 - new->trim_left); | |
2786 | new->alignstart_trim += (ambig_end_length_5 - new->trim_left); | |
2867 | /* new->querystart += (ambig_end_length_5 - new->trim_left); */ | |
2868 | /* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */ | |
2869 | new->querystart = ambig_end_length_5; | |
2870 | new->alignstart_trim = new->genomicstart + ambig_end_length_5; | |
2871 | ||
2787 | 2872 | new->trim_left = ambig_end_length_5; |
2788 | 2873 | if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) { |
2789 | 2874 | new->start_endtype = DON; |
2875 | new->siteD_prob = ambig_prob_5; | |
2790 | 2876 | } else { |
2791 | 2877 | new->start_endtype = ACC; |
2878 | new->siteA_prob = ambig_prob_5; | |
2792 | 2879 | } |
2793 | new->chimera_prob = ambig_prob_5; | |
2794 | 2880 | } |
2795 | 2881 | if (ambig_end_length_3 > 0) { |
2796 | 2882 | new->trim_right_splicep = true; |
2797 | new->queryend -= (ambig_end_length_3 - new->trim_right); | |
2798 | new->alignend_trim -= (ambig_end_length_3 - new->trim_right); | |
2883 | /* new->queryend -= (ambig_end_length_3 - new->trim_right); */ | |
2884 | /* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */ | |
2885 | new->queryend = querylength - ambig_end_length_3; | |
2886 | new->alignend_trim = new->genomicend - ambig_end_length_3; | |
2887 | ||
2799 | 2888 | new->trim_right = ambig_end_length_3; |
2800 | 2889 | if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) { |
2801 | 2890 | new->end_endtype = DON; |
2891 | new->siteD_prob = ambig_prob_3; | |
2802 | 2892 | } else { |
2803 | 2893 | new->end_endtype = ACC; |
2894 | new->siteA_prob = ambig_prob_3; | |
2804 | 2895 | } |
2805 | new->chimera_prob_2 = ambig_prob_3; | |
2806 | 2896 | } |
2807 | 2897 | |
2808 | 2898 | } else if (trim_left_action == COMPUTE_TRIM) { |
2811 | 2901 | &sensedir,chroffset); |
2812 | 2902 | if (ambig_end_length_5 > 0) { |
2813 | 2903 | new->trim_left_splicep = true; |
2814 | new->querystart += (ambig_end_length_5 - new->trim_left); | |
2815 | new->alignstart_trim += (ambig_end_length_5 - new->trim_left); | |
2904 | /* new->querystart += (ambig_end_length_5 - new->trim_left); */ | |
2905 | /* new->alignstart_trim += (ambig_end_length_5 - new->trim_left); */ | |
2906 | new->querystart = ambig_end_length_5; | |
2907 | new->alignstart_trim = new->genomicstart + ambig_end_length_5; | |
2908 | ||
2816 | 2909 | new->trim_left = ambig_end_length_5; |
2817 | 2910 | if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) { |
2818 | 2911 | new->start_endtype = DON; |
2912 | new->siteD_prob = ambig_prob_5; | |
2819 | 2913 | } else { |
2820 | 2914 | new->start_endtype = ACC; |
2915 | new->siteA_prob = ambig_prob_5; | |
2821 | 2916 | } |
2822 | new->chimera_prob = ambig_prob_5; | |
2823 | 2917 | } |
2824 | 2918 | |
2825 | 2919 | } else if (trim_right_action == COMPUTE_TRIM) { |
2828 | 2922 | &sensedir,chroffset); |
2829 | 2923 | if (ambig_end_length_3 > 0) { |
2830 | 2924 | new->trim_right_splicep = true; |
2831 | new->queryend -= (ambig_end_length_3 - new->trim_right); | |
2832 | new->alignend_trim -= (ambig_end_length_3 - new->trim_right); | |
2925 | /* new->queryend -= (ambig_end_length_3 - new->trim_right); */ | |
2926 | /* new->alignend_trim -= (ambig_end_length_3 - new->trim_right); */ | |
2927 | new->queryend = querylength - ambig_end_length_3; | |
2928 | new->alignend_trim = new->genomicend - ambig_end_length_3; | |
2929 | ||
2833 | 2930 | new->trim_right = ambig_end_length_3; |
2834 | 2931 | if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) { |
2835 | 2932 | new->end_endtype = DON; |
2933 | new->siteD_prob = ambig_prob_3; | |
2836 | 2934 | } else { |
2837 | 2935 | new->end_endtype = ACC; |
2936 | new->siteA_prob = ambig_prob_3; | |
2838 | 2937 | } |
2839 | new->chimera_prob_2 = ambig_prob_3; | |
2840 | 2938 | } |
2841 | 2939 | } |
2842 | 2940 | } |
2846 | 2944 | new->nmatches = (new->alignstart - new->alignend) - new->nmismatches_whole; |
2847 | 2945 | |
2848 | 2946 | if (trim_left_action == COMPUTE_TRIM) { |
2849 | if (nmismatches_end_left == 0) { | |
2947 | if (querystart == 0 && nmismatches_end_left == 0) { | |
2948 | debug8(printf("querystart is 0 and nmismatches_end_left is 0, so setting left_end_action to be NO_TRIM\n")); | |
2850 | 2949 | trim_left_action = NO_TRIM; |
2851 | 2950 | } else { |
2852 | new->alignstart_trim -= new->trim_left; | |
2951 | new->alignstart_trim = new->genomicstart - new->trim_left; | |
2853 | 2952 | } |
2854 | 2953 | } |
2855 | 2954 | if (trim_right_action == COMPUTE_TRIM) { |
2856 | if (nmismatches_end_right == 0) { | |
2955 | if (queryend == querylength && nmismatches_end_right == 0) { | |
2956 | debug8(printf("queryend is querylength and nmismatches_end_right is 0, so setting right_end_action to be NO_TRIM\n")); | |
2857 | 2957 | trim_right_action = NO_TRIM; |
2858 | 2958 | } else { |
2859 | new->alignend_trim += new->trim_right; | |
2959 | new->alignend_trim = new->genomicend + new->trim_right; | |
2860 | 2960 | } |
2861 | 2961 | } |
2862 | 2962 | debug2(printf("Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n", |
2871 | 2971 | &sensedir,chroffset); |
2872 | 2972 | if (ambig_end_length_5 > 0) { |
2873 | 2973 | new->trim_left_splicep = true; |
2874 | new->querystart += (ambig_end_length_5 - new->trim_left); | |
2875 | new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); | |
2974 | /* new->querystart += (ambig_end_length_5 - new->trim_left); */ | |
2975 | /* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */ | |
2976 | new->querystart = ambig_end_length_5; | |
2977 | new->alignstart_trim = new->genomicstart - ambig_end_length_5; | |
2978 | ||
2876 | 2979 | new->trim_left = ambig_end_length_5; |
2877 | 2980 | if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) { |
2878 | 2981 | new->start_endtype = DON; |
2982 | new->siteD_prob = ambig_prob_5; | |
2879 | 2983 | } else { |
2880 | 2984 | new->start_endtype = ACC; |
2985 | new->siteA_prob = ambig_prob_5; | |
2881 | 2986 | } |
2882 | new->chimera_prob = ambig_prob_5; | |
2883 | 2987 | } |
2884 | 2988 | if (ambig_end_length_3 > 0) { |
2885 | 2989 | new->trim_right_splicep = true; |
2886 | new->queryend -= (ambig_end_length_3 - new->trim_right); | |
2887 | new->alignend_trim += (ambig_end_length_3 - new->trim_right); | |
2990 | /* new->queryend -= (ambig_end_length_3 - new->trim_right); */ | |
2991 | /* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */ | |
2992 | new->queryend = querylength - ambig_end_length_3; | |
2993 | new->alignend_trim = new->genomicend + ambig_end_length_3; | |
2994 | ||
2888 | 2995 | new->trim_right = ambig_end_length_3; |
2889 | 2996 | if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) { |
2890 | 2997 | new->end_endtype = DON; |
2998 | new->siteD_prob = ambig_prob_3; | |
2891 | 2999 | } else { |
2892 | 3000 | new->end_endtype = ACC; |
3001 | new->siteA_prob = ambig_prob_3; | |
2893 | 3002 | } |
2894 | new->chimera_prob_2 = ambig_prob_3; | |
2895 | 3003 | } |
2896 | 3004 | |
2897 | 3005 | } else if (trim_left_action == COMPUTE_TRIM) { |
2900 | 3008 | &sensedir,chroffset); |
2901 | 3009 | if (ambig_end_length_5 > 0) { |
2902 | 3010 | new->trim_left_splicep = true; |
2903 | new->querystart += (ambig_end_length_5 - new->trim_left); | |
2904 | new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); | |
3011 | /* new->querystart += (ambig_end_length_5 - new->trim_left); */ | |
3012 | /* new->alignstart_trim -= (ambig_end_length_5 - new->trim_left); */ | |
3013 | new->querystart = ambig_end_length_5; | |
3014 | new->alignstart_trim = new->genomicstart - ambig_end_length_5; | |
3015 | ||
2905 | 3016 | new->trim_left = ambig_end_length_5; |
2906 | 3017 | if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) { |
2907 | 3018 | new->start_endtype = DON; |
3019 | new->siteD_prob = ambig_prob_5; | |
2908 | 3020 | } else { |
2909 | 3021 | new->start_endtype = ACC; |
3022 | new->siteA_prob = ambig_prob_5; | |
2910 | 3023 | } |
2911 | new->chimera_prob = ambig_prob_5; | |
2912 | 3024 | } |
2913 | 3025 | |
2914 | 3026 | } else if (trim_right_action == COMPUTE_TRIM) { |
2917 | 3029 | &sensedir,chroffset); |
2918 | 3030 | if (ambig_end_length_3 > 0) { |
2919 | 3031 | new->trim_right_splicep = true; |
2920 | new->queryend -= (ambig_end_length_3 - new->trim_right); | |
2921 | new->alignend_trim += (ambig_end_length_3 - new->trim_right); | |
3032 | /* new->queryend -= (ambig_end_length_3 - new->trim_right); */ | |
3033 | /* new->alignend_trim += (ambig_end_length_3 - new->trim_right); */ | |
3034 | new->queryend = querylength - ambig_end_length_3; | |
3035 | new->alignend_trim = new->genomicend + ambig_end_length_3; | |
3036 | ||
2922 | 3037 | new->trim_right = ambig_end_length_3; |
2923 | 3038 | if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) { |
2924 | 3039 | new->end_endtype = DON; |
3040 | new->siteD_prob = ambig_prob_3; | |
2925 | 3041 | } else { |
2926 | 3042 | new->end_endtype = ACC; |
3043 | new->siteA_prob = ambig_prob_3; | |
2927 | 3044 | } |
2928 | new->chimera_prob_2 = ambig_prob_3; | |
2929 | 3045 | } |
2930 | 3046 | } |
2931 | 3047 | } |
2963 | 3079 | new->amb_knowni = (int *) NULL; |
2964 | 3080 | new->amb_nmismatches = (int *) NULL; |
2965 | 3081 | new->amb_probs = (double *) NULL; |
2966 | new->amb_common_prob = 0.0; | |
2967 | new->amb_donor_common_p = false; | |
2968 | ||
2969 | debug2(printf("Returning substring %p\n",new)); | |
3082 | new->amb_type = END; | |
3083 | ||
3084 | debug2(printf("Returning substring %p, query %d..%d, trim %d..%d\n", | |
3085 | new,new->querystart,new->queryend,new->trim_left,new->trim_right)); | |
2970 | 3086 | return new; |
2971 | 3087 | } |
2972 | 3088 | |
2973 | 3089 | |
2974 | 3090 | T |
2975 | Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength, | |
2976 | Chrnum_T chrnum, Univcoord_T chroffset, | |
2977 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
2978 | bool plusp, int genestrand, | |
3091 | Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int querylength, | |
3092 | Chrnum_T chrnum, Univcoord_T chroffset, | |
3093 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
3094 | bool plusp, int genestrand, | |
2979 | 3095 | #ifdef LARGE_GENOMES |
2980 | Uint8list_T ambcoords, | |
3096 | Uint8list_T ambcoords, | |
2981 | 3097 | #else |
2982 | Uintlist_T ambcoords, | |
2983 | #endif | |
2984 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
2985 | double amb_common_prob, bool amb_donor_common_p, bool substring1p) { | |
3098 | Uintlist_T ambcoords, | |
3099 | #endif | |
3100 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
3101 | double amb_common_prob, bool substring1p) { | |
2986 | 3102 | int ignore; |
2987 | 3103 | T new = (T) MALLOC_OUT(sizeof(*new)); |
2988 | 3104 | |
2989 | debug2(printf("Entered Substring_new_ambig with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n", | |
3105 | debug2(printf("Entered Substring_new_ambig_D with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n", | |
2990 | 3106 | chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp)); |
2991 | 3107 | |
2992 | 3108 | new->exactp = false; |
3019 | 3135 | |
3020 | 3136 | new->querystart_orig = new->querystart = querystart; |
3021 | 3137 | new->queryend_orig = new->queryend = queryend; |
3022 | new->amb_splice_pos = splice_pos; | |
3023 | 3138 | new->querylength = querylength; |
3024 | 3139 | |
3025 | 3140 | new->alignstart = new->alignstart_trim = 0; |
3028 | 3143 | new->plusp = plusp; |
3029 | 3144 | new->genestrand = genestrand; |
3030 | 3145 | |
3031 | new->chimera_prob = 0.0; | |
3032 | new->chimera_knownp = false; | |
3033 | new->chimera_knownp_2 = false; | |
3034 | new->chimera_novelp = false; | |
3035 | new->chimera_novelp_2 = false; | |
3146 | new->siteD_knownp = new->siteA_knownp = false; | |
3147 | new->siteD_novelp = new->siteA_novelp = false; | |
3148 | ||
3149 | new->siteD_prob = 0.0; | |
3150 | new->siteA_prob = amb_common_prob; | |
3036 | 3151 | |
3037 | 3152 | new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches); |
3038 | 3153 | |
3074 | 3189 | new->amb_knowni = Intlist_to_array_out(&ignore,amb_knowni); |
3075 | 3190 | new->amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches); |
3076 | 3191 | new->amb_probs = Doublelist_to_array_out(&ignore,amb_probs); |
3077 | new->amb_common_prob = amb_common_prob; | |
3078 | new->amb_donor_common_p = amb_donor_common_p; | |
3192 | new->amb_splice_pos = splice_pos; | |
3193 | new->amb_type = DON; | |
3194 | ||
3195 | return new; | |
3196 | } | |
3197 | ||
3198 | T | |
3199 | Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int querylength, | |
3200 | Chrnum_T chrnum, Univcoord_T chroffset, | |
3201 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
3202 | bool plusp, int genestrand, | |
3203 | #ifdef LARGE_GENOMES | |
3204 | Uint8list_T ambcoords, | |
3205 | #else | |
3206 | Uintlist_T ambcoords, | |
3207 | #endif | |
3208 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
3209 | double amb_common_prob, bool substring1p) { | |
3210 | int ignore; | |
3211 | T new = (T) MALLOC_OUT(sizeof(*new)); | |
3212 | ||
3213 | debug2(printf("Entered Substring_new_ambig_A with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n", | |
3214 | chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp)); | |
3215 | ||
3216 | new->exactp = false; | |
3217 | ||
3218 | new->chrnum = chrnum; | |
3219 | new->chroffset = chroffset; | |
3220 | new->chrhigh = chrhigh; | |
3221 | new->chrlength = chrlength; | |
3222 | ||
3223 | new->left = 0; | |
3224 | #ifdef LARGE_GENOMES | |
3225 | if (plusp == true) { | |
3226 | new->genomicstart = Uint8list_max(ambcoords); | |
3227 | new->genomicend = Uint8list_min(ambcoords); | |
3228 | } else { | |
3229 | new->genomicstart = Uint8list_min(ambcoords); | |
3230 | new->genomicend = Uint8list_max(ambcoords); | |
3231 | } | |
3232 | #else | |
3233 | if (plusp == true) { | |
3234 | new->genomicstart = Uintlist_max(ambcoords); | |
3235 | new->genomicend = Uintlist_min(ambcoords); | |
3236 | } else { | |
3237 | new->genomicstart = Uintlist_min(ambcoords); | |
3238 | new->genomicend = Uintlist_max(ambcoords); | |
3239 | } | |
3240 | #endif | |
3241 | new->start_endtype = END; | |
3242 | new->end_endtype = END; | |
3243 | ||
3244 | new->querystart_orig = new->querystart = querystart; | |
3245 | new->queryend_orig = new->queryend = queryend; | |
3246 | new->querylength = querylength; | |
3247 | ||
3248 | new->alignstart = new->alignstart_trim = 0; | |
3249 | new->alignend = new->alignend_trim = 0; | |
3250 | ||
3251 | new->plusp = plusp; | |
3252 | new->genestrand = genestrand; | |
3253 | ||
3254 | new->siteD_knownp = new->siteA_knownp = false; | |
3255 | new->siteD_novelp = new->siteA_novelp = false; | |
3256 | ||
3257 | new->siteA_prob = 0.0; | |
3258 | new->siteD_prob = amb_common_prob; | |
3259 | ||
3260 | new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches); | |
3261 | ||
3262 | #if 0 | |
3263 | if (plusp == true) { | |
3264 | /* Fails because alignstart and alignend are not known */ | |
3265 | new->nmatches = (new->alignend_trim - new->alignstart_trim) - new->nmismatches_whole; | |
3266 | } else { | |
3267 | new->alignoffset = querylength - queryend; | |
3268 | /* Fails because alignstart and alignend are not known */ | |
3269 | new->nmatches = (new->alignstart_trim - new->alignend_trim) - new->nmismatches_whole; | |
3270 | } | |
3271 | #endif | |
3272 | new->nmatches = (queryend - querystart) - new->nmismatches_whole; | |
3273 | ||
3274 | new->genomic_bothdiff = (char *) NULL; | |
3275 | new->genomic_refdiff = (char *) NULL; | |
3276 | if (substring1p == true) { | |
3277 | debug2(printf("substring1p is true, so setting trims to be %d and %d\n",querystart,0)); | |
3278 | new->trim_left = querystart; | |
3279 | new->trim_right = 0; | |
3280 | } else { | |
3281 | debug2(printf("substring1p is false, so setting trims to be %d and %d\n",0,querylength - queryend)); | |
3282 | new->trim_left = 0; | |
3283 | new->trim_right = querylength - queryend; | |
3284 | } | |
3285 | new->mandatory_trim_left = 0; | |
3286 | new->mandatory_trim_right = 0; | |
3287 | new->trim_left_splicep = new->trim_right_splicep = false; | |
3288 | ||
3289 | ||
3290 | new->ambiguous_p = true; | |
3291 | #ifdef LARGE_GENOMES | |
3292 | new->ambcoords = Uint8list_to_array_out(&new->nambcoords,ambcoords); | |
3293 | #else | |
3294 | new->ambcoords = Uintlist_to_array_out(&new->nambcoords,ambcoords); | |
3295 | debug2(printf("ambcoords: %s\n",Uintlist_to_string(ambcoords))); | |
3296 | #endif | |
3297 | new->amb_knowni = Intlist_to_array_out(&ignore,amb_knowni); | |
3298 | new->amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches); | |
3299 | new->amb_probs = Doublelist_to_array_out(&ignore,amb_probs); | |
3300 | new->amb_splice_pos = splice_pos; | |
3301 | new->amb_type = ACC; | |
3079 | 3302 | |
3080 | 3303 | return new; |
3081 | 3304 | } |
3108 | 3331 | Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend, |
3109 | 3332 | T this, int bingoi) { |
3110 | 3333 | |
3111 | debug2(printf("Entered Substring_set_unambiguous\n")); | |
3112 | ||
3113 | this->splicecoord = this->ambcoords[bingoi]; | |
3114 | this->splicesites_knowni = this->amb_knowni[bingoi]; | |
3334 | #ifdef DEBUG2 | |
3335 | printf("Entered Substring_set_unambiguous. plusp %d",this->plusp); | |
3336 | if (this->amb_type == DON) { | |
3337 | printf("type DON\n"); | |
3338 | } else { | |
3339 | printf("type ACC\n"); | |
3340 | } | |
3341 | #endif | |
3342 | ||
3115 | 3343 | this->nmismatches_whole = this->amb_nmismatches[bingoi]; |
3116 | this->chimera_prob = this->amb_probs[bingoi]; | |
3117 | 3344 | |
3118 | 3345 | if (this->plusp == true) { |
3119 | this->left = this->splicecoord - this->amb_splice_pos; | |
3346 | if (this->amb_type == DON) { | |
3347 | *acceptor_prob = this->siteA_prob; | |
3348 | *donor_prob = this->siteD_prob = this->amb_probs[bingoi]; | |
3349 | this->splicecoord_D = this->ambcoords[bingoi]; | |
3350 | this->splicesitesD_knowni = this->amb_knowni[bingoi]; | |
3351 | this->left = this->splicecoord_D - this->amb_splice_pos; | |
3352 | } else { | |
3353 | *donor_prob = this->siteD_prob; | |
3354 | *acceptor_prob = this->siteA_prob = this->amb_probs[bingoi]; | |
3355 | this->splicecoord_A = this->ambcoords[bingoi]; | |
3356 | this->splicesitesA_knowni = this->amb_knowni[bingoi]; | |
3357 | this->left = this->splicecoord_A - this->amb_splice_pos; | |
3358 | } | |
3359 | ||
3120 | 3360 | debug2(printf("left %u\n",this->left)); |
3121 | 3361 | *genomicstart = this->genomicstart = this->left; |
3122 | 3362 | *genomicend = this->genomicend = this->left + this->querylength; |
3129 | 3369 | this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend)); |
3130 | 3370 | |
3131 | 3371 | } else { |
3132 | this->left = this->splicecoord - (this->querylength - this->amb_splice_pos); | |
3372 | if (this->amb_type == DON) { | |
3373 | *acceptor_prob = this->siteA_prob; | |
3374 | *donor_prob = this->siteD_prob = this->amb_probs[bingoi]; | |
3375 | this->splicecoord_D = this->ambcoords[bingoi]; | |
3376 | this->splicesitesD_knowni = this->amb_knowni[bingoi]; | |
3377 | this->left = this->splicecoord_D - (this->querylength - this->amb_splice_pos); | |
3378 | } else { | |
3379 | *donor_prob = this->siteD_prob; | |
3380 | *acceptor_prob = this->siteA_prob = this->amb_probs[bingoi]; | |
3381 | this->splicecoord_A = this->ambcoords[bingoi]; | |
3382 | this->splicesitesA_knowni = this->amb_knowni[bingoi]; | |
3383 | this->left = this->splicecoord_A - (this->querylength - this->amb_splice_pos); | |
3384 | } | |
3385 | ||
3133 | 3386 | debug2(printf("left %u\n",this->left)); |
3134 | 3387 | *genomicend = this->genomicend = this->left; |
3135 | 3388 | *genomicstart = this->genomicstart = this->left + this->querylength; |
3140 | 3393 | debug2(printf("querypos %d..%d, alignstart is %u (%u), alignend is %u (%u), genomicstart is %u, genomicend is %u\n", |
3141 | 3394 | this->querystart,this->queryend,this->alignstart,this->alignstart - this->chroffset, |
3142 | 3395 | this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend)); |
3143 | } | |
3144 | ||
3145 | if (this->amb_donor_common_p == true) { | |
3146 | *donor_prob = this->amb_common_prob; | |
3147 | *acceptor_prob = this->amb_probs[bingoi]; | |
3148 | } else { | |
3149 | *acceptor_prob = this->amb_common_prob; | |
3150 | *donor_prob = this->amb_probs[bingoi]; | |
3151 | 3396 | } |
3152 | 3397 | |
3153 | 3398 | this->ambiguous_p = false; |
3236 | 3481 | int extraleft, int extraright, |
3237 | 3482 | Compress_T query_compress_fwd, Compress_T query_compress_rev, |
3238 | 3483 | Genome_T genome) { |
3239 | char *genomic_diff; | |
3240 | char *gbuffer; | |
3241 | #ifndef HAVE_ALLOCA | |
3484 | ||
3485 | #if defined(LONG_READLENGTHS) | |
3486 | char *genomic_diff, *gbuffer; | |
3487 | #elif defined(HAVE_ALLOCA) | |
3488 | char *genomic_diff, *gbuffer; | |
3489 | #else | |
3490 | char *genomic_diff, *gbuffer; | |
3242 | 3491 | char gbuffer_alloc[MAX_READLENGTH/*+MAX_END_DELETIONS*/+1]; |
3243 | 3492 | bool allocp; |
3244 | 3493 | #endif |
3258 | 3507 | |
3259 | 3508 | } else { |
3260 | 3509 | /* Used to be this->genomiclength, but doesn't work for large insertions */ |
3261 | #ifdef HAVE_ALLOCA | |
3510 | #if defined(LONG_READLENGTHS) | |
3511 | gbuffer = (char *) MALLOC((querylength+1) * sizeof(char)); | |
3512 | #elif defined(HAVE_ALLOCA) | |
3262 | 3513 | gbuffer = (char *) ALLOCA((querylength+1) * sizeof(char)); |
3263 | 3514 | #else |
3264 | 3515 | if (querylength < MAX_READLENGTH) { |
3311 | 3562 | |
3312 | 3563 | if (0 && this->exactp == true && extraleft == 0 && extraright == 0) { |
3313 | 3564 | } else { |
3314 | #ifdef HAVE_ALLOCA | |
3565 | #if defined(LONG_READLENGTHS) | |
3566 | FREE(gbuffer); | |
3567 | #elif defined(HAVE_ALLOCA) | |
3315 | 3568 | FREEA(gbuffer); |
3316 | 3569 | #else |
3317 | 3570 | if (allocp == true) { |
3329 | 3582 | |
3330 | 3583 | } else { |
3331 | 3584 | /* Used to be this->genomiclength, but doesn't work for large insertions */ |
3332 | #ifdef HAVE_ALLOCA | |
3585 | #if defined(LONG_READLENGTHS) | |
3586 | gbuffer = (char *) MALLOC((querylength+1) * sizeof(char)); | |
3587 | #elif defined(HAVE_ALLOCA) | |
3333 | 3588 | gbuffer = (char *) ALLOCA((querylength+1) * sizeof(char)); |
3334 | 3589 | #else |
3335 | 3590 | if (querylength < MAX_READLENGTH) { |
3385 | 3640 | |
3386 | 3641 | if (0 && this->exactp == true && extraleft == 0 && extraright == 0) { |
3387 | 3642 | } else { |
3388 | #ifdef HAVE_ALLOCA | |
3643 | #if defined(LONG_READLENGTHS) | |
3644 | FREE(gbuffer); | |
3645 | #elif defined(HAVE_ALLOCA) | |
3389 | 3646 | FREEA(gbuffer); |
3390 | 3647 | #else |
3391 | 3648 | if (allocp == true) { |
3404 | 3661 | return this->left; |
3405 | 3662 | } |
3406 | 3663 | |
3664 | ||
3407 | 3665 | Univcoord_T |
3408 | Substring_splicecoord (T this) { | |
3409 | return this->splicecoord; | |
3410 | } | |
3411 | ||
3412 | Chrpos_T | |
3413 | Substring_chr_splicecoord (T this) { | |
3414 | return (Chrpos_T) (this->splicecoord - this->chroffset); | |
3415 | } | |
3416 | ||
3417 | int | |
3418 | Substring_splicesites_knowni (T this) { | |
3419 | return this->splicesites_knowni; | |
3666 | Substring_splicecoord_D (T this) { | |
3667 | return this->splicecoord_D; | |
3420 | 3668 | } |
3421 | 3669 | |
3422 | 3670 | Univcoord_T |
3423 | 3671 | Substring_splicecoord_A (T this) { |
3424 | return this->splicecoord; | |
3425 | } | |
3426 | ||
3427 | Univcoord_T | |
3428 | Substring_splicecoord_D (T this) { | |
3429 | return this->splicecoord_2; | |
3672 | return this->splicecoord_A; | |
3673 | } | |
3674 | ||
3675 | Chrpos_T | |
3676 | Substring_chr_splicecoord_D (T this) { | |
3677 | return (Chrpos_T) (this->splicecoord_D - this->chroffset); | |
3678 | } | |
3679 | ||
3680 | Chrpos_T | |
3681 | Substring_chr_splicecoord_A (T this) { | |
3682 | return (Chrpos_T) (this->splicecoord_A - this->chroffset); | |
3683 | } | |
3684 | ||
3685 | int | |
3686 | Substring_splicesitesD_knowni (T this) { | |
3687 | return this->splicesitesD_knowni; | |
3688 | } | |
3689 | ||
3690 | int | |
3691 | Substring_splicesitesA_knowni (T this) { | |
3692 | return this->splicesitesA_knowni; | |
3430 | 3693 | } |
3431 | 3694 | |
3432 | 3695 | bool |
3743 | 4006 | double max; |
3744 | 4007 | int i; |
3745 | 4008 | |
3746 | if (this->amb_donor_common_p == true) { | |
3747 | return this->amb_common_prob; | |
4009 | if (this->amb_type == DON) { | |
4010 | return this->siteD_prob; | |
3748 | 4011 | } else { |
3749 | 4012 | max = this->amb_probs[0]; |
3750 | 4013 | for (i = 1; i < this->nambcoords; i++) { |
3761 | 4024 | double max; |
3762 | 4025 | int i; |
3763 | 4026 | |
3764 | if (this->amb_donor_common_p == true) { | |
4027 | if (this->amb_type == ACC) { | |
4028 | return this->siteA_prob; | |
4029 | } else { | |
3765 | 4030 | max = this->amb_probs[0]; |
3766 | 4031 | for (i = 1; i < this->nambcoords; i++) { |
3767 | 4032 | if (this->amb_probs[i] > max) { |
3769 | 4034 | } |
3770 | 4035 | } |
3771 | 4036 | return max; |
3772 | } else { | |
3773 | return this->amb_common_prob; | |
3774 | } | |
3775 | } | |
3776 | ||
3777 | ||
4037 | } | |
4038 | } | |
4039 | ||
4040 | ||
4041 | ||
4042 | double | |
4043 | Substring_siteD_prob (T this) { | |
4044 | return this->siteD_prob; | |
4045 | } | |
3778 | 4046 | |
3779 | 4047 | double |
3780 | 4048 | Substring_siteA_prob (T this) { |
3781 | 4049 | return this->siteA_prob; |
3782 | 4050 | } |
3783 | 4051 | |
3784 | double | |
3785 | Substring_siteD_prob (T this) { | |
3786 | return this->siteD_prob; | |
3787 | } | |
3788 | ||
3789 | ||
3790 | double | |
3791 | Substring_chimera_prob (T this) { | |
3792 | return this->chimera_prob; | |
3793 | } | |
3794 | ||
3795 | double | |
3796 | Substring_chimera_prob_2 (T this) { | |
3797 | return this->chimera_prob_2; | |
3798 | } | |
3799 | 4052 | |
3800 | 4053 | int |
3801 | Substring_chimera_pos (T this) { | |
3802 | return this->chimera_pos; | |
3803 | } | |
3804 | ||
3805 | /* For shortexon */ | |
4054 | Substring_siteD_pos (T this) { | |
4055 | return this->siteD_pos; | |
4056 | } | |
4057 | ||
3806 | 4058 | int |
3807 | Substring_chimera_pos_A (T this) { | |
3808 | return this->chimera_pos; | |
3809 | } | |
3810 | ||
3811 | /* For shortexon */ | |
4059 | Substring_siteA_pos (T this) { | |
4060 | return this->siteA_pos; | |
4061 | } | |
4062 | ||
3812 | 4063 | int |
3813 | Substring_chimera_pos_D (T this) { | |
3814 | return this->chimera_pos_2; | |
3815 | } | |
3816 | ||
3817 | bool | |
3818 | Substring_chimera_knownp (T this) { | |
3819 | return this->chimera_knownp; | |
3820 | } | |
4064 | Substring_siteN_pos (T this) { | |
4065 | return this->siteN_pos; | |
4066 | } | |
4067 | ||
3821 | 4068 | |
3822 | 4069 | int |
3823 | 4070 | Substring_nchimera_known (T this) { |
3824 | 4071 | if (this == NULL) { |
3825 | 4072 | return 0; |
3826 | 4073 | } else { |
3827 | return (int) this->chimera_knownp + (int) this->chimera_knownp_2; | |
4074 | return (int) this->siteD_knownp + (int) this->siteA_knownp; | |
3828 | 4075 | } |
3829 | 4076 | } |
3830 | 4077 | |
3833 | 4080 | if (this == NULL) { |
3834 | 4081 | return 0; |
3835 | 4082 | } else { |
3836 | return (int) this->chimera_novelp + (int) this->chimera_novelp_2; | |
4083 | return (int) this->siteD_novelp + (int) this->siteA_novelp; | |
3837 | 4084 | } |
3838 | 4085 | } |
3839 | 4086 | |
3850 | 4097 | return this->ambiguous_p; |
3851 | 4098 | } |
3852 | 4099 | |
4100 | bool | |
4101 | Substring_list_ambiguous_p (List_T list) { | |
4102 | T this; | |
4103 | List_T p; | |
4104 | ||
4105 | for (p = list; p != NULL; p = List_next(p)) { | |
4106 | this = (T) List_head(p); | |
4107 | if (this->ambiguous_p == true) { | |
4108 | return true; | |
4109 | } | |
4110 | } | |
4111 | return false; | |
4112 | } | |
4113 | ||
3853 | 4114 | int |
3854 | 4115 | Substring_nambcoords (T this) { |
3855 | 4116 | return this->nambcoords; |
3874 | 4135 | Substring_amb_probs (T this) { |
3875 | 4136 | return this->amb_probs; |
3876 | 4137 | } |
3877 | ||
3878 | ||
3879 | 4138 | |
3880 | 4139 | |
3881 | 4140 | |
3979 | 4238 | |
3980 | 4239 | new->chimera_sensedir = old->chimera_sensedir; |
3981 | 4240 | |
3982 | new->splicecoord = old->splicecoord; | |
3983 | new->splicesites_knowni = old->splicesites_knowni; | |
3984 | new->chimera_knownp = old->chimera_knownp; | |
3985 | new->chimera_novelp = old->chimera_novelp; | |
3986 | new->chimera_modelpos = old->chimera_modelpos; | |
3987 | new->chimera_pos = old->chimera_pos; | |
3988 | new->chimera_prob = old->chimera_prob; | |
3989 | ||
3990 | new->splicecoord_2 = old->splicecoord_2; | |
3991 | new->splicesites_knowni_2 = old->splicesites_knowni_2; | |
3992 | new->chimera_knownp_2 = old->chimera_knownp_2; | |
3993 | new->chimera_novelp_2 = old->chimera_novelp_2; | |
3994 | new->chimera_modelpos_2 = old->chimera_modelpos_2; | |
3995 | new->chimera_pos_2 = old->chimera_pos_2; | |
3996 | new->chimera_prob_2 = old->chimera_prob_2; | |
4241 | new->splicecoord_D = old->splicecoord_D; | |
4242 | new->splicesitesD_knowni = old->splicesitesD_knowni; | |
4243 | new->siteD_knownp = old->siteD_knownp; | |
4244 | new->siteD_novelp = old->siteD_novelp; | |
4245 | new->siteD_pos = old->siteD_pos; | |
4246 | new->siteD_prob = old->siteD_prob; | |
4247 | ||
4248 | new->splicecoord_A = old->splicecoord_A; | |
4249 | new->splicesitesA_knowni = old->splicesitesA_knowni; | |
4250 | new->siteA_knownp = old->siteA_knownp; | |
4251 | new->siteA_novelp = old->siteA_novelp; | |
4252 | new->siteA_pos = old->siteA_pos; | |
4253 | new->siteA_prob = old->siteA_prob; | |
4254 | ||
4255 | new->splicecoord_N = old->splicecoord_N; | |
4256 | new->siteN_pos = old->siteN_pos; | |
3997 | 4257 | |
3998 | 4258 | new->ambiguous_p = old->ambiguous_p; |
3999 | 4259 | if (old->nambcoords == 0) { |
4002 | 4262 | new->amb_knowni = (int *) NULL; |
4003 | 4263 | new->amb_nmismatches = (int *) NULL; |
4004 | 4264 | new->amb_probs = (double *) NULL; |
4005 | new->amb_common_prob = 0.0; | |
4006 | new->amb_donor_common_p = false; | |
4007 | 4265 | } else { |
4008 | 4266 | new->nambcoords = old->nambcoords; |
4009 | 4267 | new->ambcoords = (Univcoord_T *) MALLOC_OUT(old->nambcoords * sizeof(Univcoord_T)); |
4010 | 4268 | new->amb_knowni = (int *) MALLOC_OUT(old->nambcoords * sizeof(int)); |
4011 | 4269 | new->amb_nmismatches = (int *) MALLOC_OUT(old->nambcoords * sizeof(int)); |
4012 | 4270 | new->amb_probs = (double *) MALLOC_OUT(old->nambcoords * sizeof(double)); |
4013 | new->amb_common_prob = old->amb_common_prob; | |
4014 | new->amb_donor_common_p = old->amb_donor_common_p; | |
4015 | 4271 | |
4016 | 4272 | memcpy(new->ambcoords,old->ambcoords,old->nambcoords * sizeof(Univcoord_T)); |
4017 | 4273 | memcpy(new->amb_knowni,old->amb_knowni,old->nambcoords * sizeof(int)); |
4018 | 4274 | memcpy(new->amb_nmismatches,old->amb_nmismatches,old->nambcoords * sizeof(int)); |
4019 | 4275 | memcpy(new->amb_probs,old->amb_probs,old->nambcoords * sizeof(double)); |
4020 | 4276 | } |
4277 | new->amb_type = old->amb_type; | |
4021 | 4278 | |
4022 | 4279 | return new; |
4023 | 4280 | } |
4083 | 4340 | |
4084 | 4341 | debug2(printf("Making new startfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n", |
4085 | 4342 | startfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset)); |
4086 | new->splicecoord = startfrag_coord; | |
4087 | new->splicesites_knowni = -1; | |
4088 | ||
4089 | new->chimera_modelpos = left + splice_pos; | |
4090 | assert(new->splicecoord == new->chimera_modelpos); | |
4343 | new->splicecoord_N = startfrag_coord; | |
4344 | assert(startfrag_coord == left + splice_pos); | |
4345 | ||
4091 | 4346 | new->chimera_sensedir = SENSE_NULL; |
4092 | /* new->chimera_knownp = false; */ | |
4093 | new->chimera_novelp = true; | |
4094 | 4347 | |
4095 | 4348 | if (plusp == true) { |
4096 | new->chimera_pos = splice_pos; | |
4097 | } else { | |
4098 | new->chimera_pos = querylength - splice_pos; | |
4099 | } | |
4100 | new->chimera_prob = 0.0; | |
4101 | ||
4102 | new->siteA_prob = 0.0; | |
4103 | new->siteD_prob = 0.0; | |
4349 | new->siteN_pos = splice_pos; | |
4350 | } else { | |
4351 | new->siteN_pos = querylength - splice_pos; | |
4352 | } | |
4104 | 4353 | |
4105 | 4354 | return new; |
4106 | 4355 | } |
4165 | 4414 | |
4166 | 4415 | debug2(printf("Making new endfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n", |
4167 | 4416 | endfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset)); |
4168 | new->splicecoord = endfrag_coord; | |
4169 | new->splicesites_knowni = -1; | |
4170 | ||
4171 | new->chimera_modelpos = left + splice_pos; | |
4172 | assert(new->splicecoord == new->chimera_modelpos); | |
4417 | new->splicecoord_N = endfrag_coord; | |
4418 | assert(endfrag_coord == left + splice_pos); | |
4419 | ||
4173 | 4420 | new->chimera_sensedir = SENSE_NULL; |
4174 | /* new->chimera_knownp = false; */ | |
4175 | new->chimera_novelp = true; | |
4176 | 4421 | |
4177 | 4422 | if (plusp == true) { |
4178 | new->chimera_pos = splice_pos; | |
4179 | } else { | |
4180 | new->chimera_pos = querylength - splice_pos; | |
4181 | } | |
4182 | new->chimera_prob = 0.0; | |
4183 | ||
4184 | new->siteA_prob = 0.0; | |
4185 | new->siteD_prob = 0.0; | |
4423 | new->siteN_pos = splice_pos; | |
4424 | } else { | |
4425 | new->siteN_pos = querylength - splice_pos; | |
4426 | } | |
4186 | 4427 | |
4187 | 4428 | return new; |
4188 | 4429 | } |
4219 | 4460 | |
4220 | 4461 | querystart = substring_querystart; /* 0, for an end piece */ |
4221 | 4462 | queryend = donor_pos; |
4463 | #if 0 | |
4222 | 4464 | if (querystart == 0) { |
4223 | 4465 | trim_left_action = COMPUTE_TRIM; /* querystart == 0 */ |
4224 | 4466 | } else { |
4225 | 4467 | trim_left_action = PRE_TRIMMED; |
4226 | 4468 | } |
4469 | #else | |
4470 | trim_left_action = COMPUTE_TRIM; | |
4471 | #endif | |
4227 | 4472 | trim_right_action = NO_TRIM; |
4228 | 4473 | |
4229 | 4474 | } else if (sensedir == SENSE_ANTI) { |
4233 | 4478 | querystart = donor_pos; |
4234 | 4479 | queryend = substring_queryend; /* querylength, for an end piece */ |
4235 | 4480 | trim_left_action = NO_TRIM; |
4481 | #if 0 | |
4236 | 4482 | if (queryend == querylength) { |
4237 | 4483 | trim_right_action = COMPUTE_TRIM; /* queryend == querylength */ |
4238 | 4484 | } else { |
4239 | 4485 | trim_right_action = PRE_TRIMMED; |
4240 | 4486 | } |
4487 | #else | |
4488 | trim_right_action = COMPUTE_TRIM; | |
4489 | #endif | |
4241 | 4490 | |
4242 | 4491 | } else { |
4243 | 4492 | abort(); |
4255 | 4504 | |
4256 | 4505 | querystart = substring_querystart; /* 0, for an end piece */ |
4257 | 4506 | queryend = querylength - donor_pos; |
4507 | #if 0 | |
4258 | 4508 | if (querystart == 0) { |
4259 | 4509 | trim_left_action = COMPUTE_TRIM; /* querystart == 0 */ |
4260 | 4510 | } else { |
4261 | 4511 | trim_left_action = PRE_TRIMMED; |
4262 | 4512 | } |
4513 | #else | |
4514 | trim_left_action = COMPUTE_TRIM; | |
4515 | #endif | |
4263 | 4516 | trim_right_action = NO_TRIM; |
4264 | 4517 | |
4265 | 4518 | } else if (sensedir == SENSE_ANTI) { |
4269 | 4522 | querystart = querylength - donor_pos; |
4270 | 4523 | queryend = substring_queryend; /* querylength, for an end piece */ |
4271 | 4524 | trim_left_action = NO_TRIM; |
4525 | #if 0 | |
4272 | 4526 | if (queryend == querylength) { |
4273 | 4527 | trim_right_action = COMPUTE_TRIM; /* queryend == querylength */ |
4274 | 4528 | } else { |
4275 | 4529 | trim_right_action = PRE_TRIMMED; |
4276 | 4530 | } |
4531 | #else | |
4532 | trim_right_action = COMPUTE_TRIM; | |
4533 | #endif | |
4277 | 4534 | |
4278 | 4535 | } else { |
4279 | 4536 | abort(); |
4291 | 4548 | return (T) NULL; |
4292 | 4549 | } |
4293 | 4550 | |
4294 | debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n", | |
4295 | donor_knowni,donor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset)); | |
4296 | new->splicecoord = donor_coord; | |
4297 | new->splicesites_knowni = donor_knowni; | |
4298 | ||
4299 | new->chimera_modelpos = left + donor_pos; | |
4300 | assert(new->splicecoord == new->chimera_modelpos); | |
4551 | debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, trim %d..%d, genome %u..%u\n", | |
4552 | donor_knowni,donor_coord,left,plusp,sensedir,new->querystart,new->queryend, | |
4553 | new->trim_left,new->trim_right,alignstart - chroffset,alignend - chroffset)); | |
4554 | debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend)); | |
4555 | debug2(printf("Setting siteD_prob to be %f\n",donor_prob)); | |
4556 | ||
4557 | new->splicecoord_D = donor_coord; | |
4558 | new->splicesitesD_knowni = donor_knowni; | |
4559 | assert(donor_coord == left + donor_pos); | |
4560 | ||
4301 | 4561 | new->chimera_sensedir = sensedir; |
4302 | 4562 | if (donor_knowni >= 0) { |
4303 | new->chimera_knownp = true; | |
4563 | new->siteD_knownp = true; | |
4304 | 4564 | /* new->chimera_novelp = false */ |
4305 | 4565 | } else { |
4306 | /* new->chimera_knownp = false; */ | |
4307 | new->chimera_novelp = true; | |
4566 | /* new->siteD_knownp = false; */ | |
4567 | new->siteD_novelp = true; | |
4308 | 4568 | } |
4309 | 4569 | |
4310 | 4570 | if (plusp == true) { |
4311 | new->chimera_pos = donor_pos; | |
4312 | } else { | |
4313 | new->chimera_pos = querylength - donor_pos; | |
4314 | } | |
4315 | new->chimera_prob = donor_prob; | |
4316 | ||
4317 | new->siteA_prob = 0.0; | |
4571 | new->siteD_pos = donor_pos; | |
4572 | } else { | |
4573 | new->siteD_pos = querylength - donor_pos; | |
4574 | } | |
4318 | 4575 | new->siteD_prob = donor_prob; |
4319 | 4576 | |
4320 | 4577 | return new; |
4352 | 4609 | querystart = acceptor_pos; |
4353 | 4610 | queryend = substring_queryend; /* querylength, for an end piece */ |
4354 | 4611 | trim_left_action = NO_TRIM; |
4612 | #if 0 | |
4355 | 4613 | if (queryend == querylength) { |
4356 | 4614 | trim_right_action = COMPUTE_TRIM; /* queryend == querylength */ |
4357 | 4615 | } else { |
4358 | 4616 | trim_right_action = PRE_TRIMMED; |
4359 | 4617 | } |
4618 | #else | |
4619 | trim_right_action = COMPUTE_TRIM; | |
4620 | #endif | |
4360 | 4621 | |
4361 | 4622 | } else if (sensedir == SENSE_ANTI) { |
4362 | 4623 | start_endtype = END; |
4364 | 4625 | |
4365 | 4626 | querystart = substring_querystart; /* 0, for an end piece */ |
4366 | 4627 | queryend = acceptor_pos; |
4628 | #if 0 | |
4367 | 4629 | if (querystart == 0) { |
4368 | 4630 | trim_left_action = COMPUTE_TRIM; /* querystart == 0 */ |
4369 | 4631 | } else { |
4370 | 4632 | trim_left_action = PRE_TRIMMED; |
4371 | 4633 | } |
4634 | #else | |
4635 | trim_left_action = COMPUTE_TRIM; | |
4636 | #endif | |
4372 | 4637 | trim_right_action = NO_TRIM; |
4373 | 4638 | |
4374 | 4639 | } else { |
4388 | 4653 | querystart = querylength - acceptor_pos; |
4389 | 4654 | queryend = substring_queryend; /* querylength, for an end piece */ |
4390 | 4655 | trim_left_action = NO_TRIM; |
4656 | #if 0 | |
4391 | 4657 | if (queryend == querylength) { |
4392 | 4658 | trim_right_action = COMPUTE_TRIM; /* queryend == querylength */ |
4393 | 4659 | } else { |
4394 | 4660 | trim_right_action = PRE_TRIMMED; |
4395 | 4661 | } |
4662 | #else | |
4663 | trim_right_action = COMPUTE_TRIM; | |
4664 | #endif | |
4396 | 4665 | |
4397 | 4666 | } else if (sensedir == SENSE_ANTI) { |
4398 | 4667 | start_endtype = END; |
4400 | 4669 | |
4401 | 4670 | querystart = substring_querystart; /* 0, for an end piece */ |
4402 | 4671 | queryend = querylength - acceptor_pos; |
4672 | #if 0 | |
4403 | 4673 | if (querystart == 0) { |
4404 | 4674 | trim_left_action = COMPUTE_TRIM; /* querystart == 0 */ |
4405 | 4675 | } else { |
4406 | 4676 | trim_left_action = PRE_TRIMMED; |
4407 | 4677 | } |
4678 | #else | |
4679 | trim_left_action = COMPUTE_TRIM; | |
4680 | #endif | |
4408 | 4681 | trim_right_action = NO_TRIM; |
4409 | 4682 | |
4410 | 4683 | } else { |
4423 | 4696 | return (T) NULL; |
4424 | 4697 | } |
4425 | 4698 | |
4426 | debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n", | |
4427 | acceptor_knowni,acceptor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset)); | |
4699 | debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, trim %d..%d, genome %u..%u\n", | |
4700 | acceptor_knowni,acceptor_coord,left,plusp,sensedir,new->querystart,new->queryend, | |
4701 | new->trim_left,new->trim_right,alignstart - chroffset,alignend - chroffset)); | |
4428 | 4702 | debug2(printf("Original bounds were %d..%d\n",substring_querystart,substring_queryend)); |
4429 | ||
4430 | new->splicecoord = acceptor_coord; | |
4431 | new->splicesites_knowni = acceptor_knowni; | |
4432 | ||
4433 | new->chimera_modelpos = left + acceptor_pos; | |
4434 | assert(new->splicecoord == new->chimera_modelpos); | |
4703 | debug2(printf("Setting siteA_prob to be %f\n",acceptor_prob)); | |
4704 | ||
4705 | new->splicecoord_A = acceptor_coord; | |
4706 | new->splicesitesA_knowni = acceptor_knowni; | |
4707 | assert(acceptor_coord == left + acceptor_pos); | |
4708 | ||
4435 | 4709 | new->chimera_sensedir = sensedir; |
4436 | 4710 | if (acceptor_knowni >= 0) { |
4437 | new->chimera_knownp = true; | |
4711 | new->siteA_knownp = true; | |
4438 | 4712 | /* new->chimera_novelp = false */ |
4439 | 4713 | } else { |
4440 | 4714 | /* new->chimera_knownp = false; */ |
4441 | new->chimera_novelp = true; | |
4715 | new->siteA_novelp = true; | |
4442 | 4716 | } |
4443 | 4717 | |
4444 | 4718 | if (plusp == true) { |
4445 | new->chimera_pos = acceptor_pos; | |
4446 | } else { | |
4447 | new->chimera_pos = querylength - acceptor_pos; | |
4448 | } | |
4449 | new->chimera_prob = acceptor_prob; | |
4450 | ||
4719 | new->siteA_pos = acceptor_pos; | |
4720 | } else { | |
4721 | new->siteA_pos = querylength - acceptor_pos; | |
4722 | } | |
4451 | 4723 | new->siteA_prob = acceptor_prob; |
4452 | new->siteD_prob = 0.0; | |
4453 | 4724 | |
4454 | 4725 | return new; |
4455 | 4726 | } |
4525 | 4796 | } |
4526 | 4797 | |
4527 | 4798 | debug2(printf("Making new middle with left %u, plusp %d\n",left,plusp)); |
4528 | new->splicecoord = acceptor_coord; | |
4529 | new->splicesites_knowni = acceptor_knowni; | |
4530 | new->splicecoord_2 = donor_coord; | |
4531 | new->splicesites_knowni_2 = donor_knowni; | |
4532 | ||
4533 | new->chimera_modelpos = left + acceptor_pos; | |
4534 | new->chimera_modelpos_2 = left + donor_pos; | |
4799 | new->splicecoord_A = acceptor_coord; | |
4800 | new->splicesitesA_knowni = acceptor_knowni; | |
4801 | new->splicecoord_D = donor_coord; | |
4802 | new->splicesitesD_knowni = donor_knowni; | |
4803 | ||
4535 | 4804 | new->chimera_sensedir = sensedir; |
4536 | 4805 | |
4537 | 4806 | if (acceptor_knowni >= 0) { |
4538 | new->chimera_knownp = true; | |
4807 | new->siteA_knownp = true; | |
4539 | 4808 | /* new->chimera_novelp = false; */ |
4540 | 4809 | } else { |
4541 | 4810 | /* new->chimera_knownp = false; */ |
4542 | new->chimera_novelp = true; | |
4811 | new->siteA_novelp = true; | |
4543 | 4812 | } |
4544 | 4813 | |
4545 | 4814 | if (donor_knowni >= 0) { |
4546 | new->chimera_knownp_2 = true; | |
4815 | new->siteD_knownp = true; | |
4547 | 4816 | /* new->chimera_novelp_2 = false; */ |
4548 | 4817 | } else { |
4549 | /* new->chimera_knownp_2 = false; */ | |
4550 | new->chimera_novelp_2 = true; | |
4818 | /* new->siteD_knownp_2 = false; */ | |
4819 | new->siteD_novelp = true; | |
4551 | 4820 | } |
4552 | 4821 | |
4553 | 4822 | if (plusp == true) { |
4554 | new->chimera_pos = acceptor_pos; | |
4555 | new->chimera_pos_2 = donor_pos; | |
4556 | } else { | |
4557 | new->chimera_pos = querylength - acceptor_pos; | |
4558 | new->chimera_pos_2 = querylength - donor_pos; | |
4559 | } | |
4560 | ||
4561 | new->chimera_prob = acceptor_prob; | |
4562 | new->chimera_prob_2 = donor_prob; | |
4823 | new->siteA_pos = acceptor_pos; | |
4824 | new->siteD_pos = donor_pos; | |
4825 | } else { | |
4826 | new->siteA_pos = querylength - acceptor_pos; | |
4827 | new->siteD_pos = querylength - donor_pos; | |
4828 | } | |
4563 | 4829 | |
4564 | 4830 | new->siteA_prob = acceptor_prob; |
4565 | 4831 | new->siteD_prob = donor_prob; |
4575 | 4841 | if (donor == NULL) { |
4576 | 4842 | return; |
4577 | 4843 | |
4578 | } else if (donor->chimera_knownp == false) { | |
4844 | } else if (donor->siteD_knownp == false) { | |
4579 | 4845 | /* Prob already assigned */ |
4580 | 4846 | |
4581 | 4847 | } else if (donor->chimera_sensedir == SENSE_FORWARD) { |
4582 | 4848 | if (donor->plusp == true) { |
4583 | donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset); | |
4584 | } else { | |
4585 | donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset); | |
4849 | donor->siteD_prob = Maxent_hr_donor_prob(donor->splicecoord_D,donor->chroffset); | |
4850 | } else { | |
4851 | donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset); | |
4586 | 4852 | } |
4587 | 4853 | |
4588 | 4854 | } else if (donor->chimera_sensedir == SENSE_ANTI) { |
4589 | 4855 | if (donor->plusp == true) { |
4590 | donor->chimera_prob = Maxent_hr_antidonor_prob(donor->splicecoord,donor->chroffset); | |
4591 | } else { | |
4592 | donor->chimera_prob = Maxent_hr_donor_prob(donor->splicecoord,donor->chroffset); | |
4856 | donor->siteD_prob = Maxent_hr_antidonor_prob(donor->splicecoord_D,donor->chroffset); | |
4857 | } else { | |
4858 | donor->siteD_prob = Maxent_hr_donor_prob(donor->splicecoord_D,donor->chroffset); | |
4593 | 4859 | } |
4594 | 4860 | |
4595 | 4861 | } else { |
4596 | 4862 | /* SENSE_NULL */ |
4597 | donor->chimera_prob = 0.0; | |
4863 | donor->siteD_prob = 0.0; | |
4598 | 4864 | } |
4599 | 4865 | |
4600 | 4866 | return; |
4606 | 4872 | if (acceptor == NULL) { |
4607 | 4873 | return; |
4608 | 4874 | |
4609 | } else if (acceptor->chimera_knownp == false) { | |
4875 | } else if (acceptor->siteA_knownp == false) { | |
4610 | 4876 | /* Prob already assigned */ |
4611 | 4877 | |
4612 | 4878 | } else if (acceptor->chimera_sensedir == SENSE_FORWARD) { |
4613 | 4879 | if (acceptor->plusp == true) { |
4614 | acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset); | |
4615 | } else { | |
4616 | acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset); | |
4880 | acceptor->siteA_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord_A,acceptor->chroffset); | |
4881 | } else { | |
4882 | acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset); | |
4617 | 4883 | } |
4618 | 4884 | |
4619 | 4885 | } else if (acceptor->chimera_sensedir == SENSE_ANTI) { |
4620 | 4886 | if (acceptor->plusp == true) { |
4621 | acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord,acceptor->chroffset); | |
4622 | } else { | |
4623 | acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord,acceptor->chroffset); | |
4887 | acceptor->siteA_prob = Maxent_hr_antiacceptor_prob(acceptor->splicecoord_A,acceptor->chroffset); | |
4888 | } else { | |
4889 | acceptor->siteA_prob = Maxent_hr_acceptor_prob(acceptor->splicecoord_A,acceptor->chroffset); | |
4624 | 4890 | } |
4625 | 4891 | |
4626 | 4892 | } else { |
4627 | 4893 | /* SENSE_NULL */ |
4628 | acceptor->chimera_prob = 0.0; | |
4894 | acceptor->siteA_prob = 0.0; | |
4629 | 4895 | } |
4630 | 4896 | |
4631 | 4897 | return; |
4635 | 4901 | void |
4636 | 4902 | Substring_assign_shortexon_prob (T shortexon) { |
4637 | 4903 | |
4638 | if (shortexon->chimera_knownp == false) { | |
4904 | if (shortexon->siteA_knownp == false) { | |
4639 | 4905 | /* Prob1 already assigned */ |
4640 | 4906 | |
4641 | 4907 | } else if (shortexon->chimera_sensedir == SENSE_FORWARD) { |
4642 | 4908 | if (shortexon->plusp == true) { |
4643 | shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset); | |
4644 | } else { | |
4645 | shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset); | |
4909 | shortexon->siteA_prob = Maxent_hr_acceptor_prob(shortexon->splicecoord_A,shortexon->chroffset); | |
4910 | } else { | |
4911 | shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset); | |
4646 | 4912 | } |
4647 | 4913 | |
4648 | 4914 | } else if (shortexon->chimera_sensedir == SENSE_ANTI) { |
4649 | 4915 | if (shortexon->plusp == true) { |
4650 | shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset); | |
4651 | } else { | |
4652 | shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset); | |
4916 | shortexon->siteA_prob = Maxent_hr_antiacceptor_prob(shortexon->splicecoord_A,shortexon->chroffset); | |
4917 | } else { | |
4918 | shortexon->siteA_prob = Maxent_hr_acceptor_prob(shortexon->splicecoord_A,shortexon->chroffset); | |
4653 | 4919 | } |
4654 | 4920 | |
4655 | 4921 | } else { |
4656 | 4922 | abort(); |
4657 | 4923 | } |
4658 | 4924 | |
4659 | if (shortexon->chimera_knownp_2 == false) { | |
4925 | if (shortexon->siteD_knownp == false) { | |
4660 | 4926 | /* Prob2 already assigned */ |
4661 | 4927 | |
4662 | 4928 | } else if (shortexon->chimera_sensedir == SENSE_FORWARD) { |
4663 | 4929 | if (shortexon->plusp == true) { |
4664 | shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset); | |
4665 | } else { | |
4666 | shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset); | |
4930 | shortexon->siteD_prob = Maxent_hr_donor_prob(shortexon->splicecoord_D,shortexon->chroffset); | |
4931 | } else { | |
4932 | shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset); | |
4667 | 4933 | } |
4668 | 4934 | |
4669 | 4935 | } else if (shortexon->chimera_sensedir == SENSE_ANTI) { |
4670 | 4936 | if (shortexon->plusp == true) { |
4671 | shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset); | |
4672 | } else { | |
4673 | shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset); | |
4937 | shortexon->siteD_prob = Maxent_hr_antidonor_prob(shortexon->splicecoord_D,shortexon->chroffset); | |
4938 | } else { | |
4939 | shortexon->siteD_prob = Maxent_hr_donor_prob(shortexon->splicecoord_D,shortexon->chroffset); | |
4674 | 4940 | } |
4675 | 4941 | |
4676 | 4942 | } else { |
4683 | 4949 | |
4684 | 4950 | |
4685 | 4951 | static int |
4686 | ascending_pos_cmp (const void *a, const void *b) { | |
4952 | ascending_siteD_pos_cmp (const void *a, const void *b) { | |
4687 | 4953 | T x = * (T *) a; |
4688 | 4954 | T y = * (T *) b; |
4689 | 4955 | |
4690 | if (x->chimera_pos < y->chimera_pos) { | |
4956 | if (x->siteD_pos < y->siteD_pos) { | |
4691 | 4957 | return -1; |
4692 | } else if (x->chimera_pos > y->chimera_pos) { | |
4958 | } else if (x->siteD_pos > y->siteD_pos) { | |
4693 | 4959 | return +1; |
4694 | 4960 | } else if (x->genomicstart < y->genomicstart) { |
4695 | 4961 | return -1; |
4696 | 4962 | } else if (x->genomicstart > y->genomicstart) { |
4697 | 4963 | return +1; |
4698 | } else if (x->chimera_knownp == true && y->chimera_knownp == false) { | |
4964 | } else if (x->siteD_knownp == true && y->siteD_knownp == false) { | |
4699 | 4965 | return -1; |
4700 | } else if (y->chimera_knownp == true && x->chimera_knownp == false) { | |
4966 | } else if (y->siteD_knownp == true && x->siteD_knownp == false) { | |
4701 | 4967 | return +1; |
4702 | 4968 | } else { |
4703 | 4969 | return 0; |
4705 | 4971 | } |
4706 | 4972 | |
4707 | 4973 | static int |
4708 | descending_pos_cmp (const void *a, const void *b) { | |
4974 | ascending_siteA_pos_cmp (const void *a, const void *b) { | |
4709 | 4975 | T x = * (T *) a; |
4710 | 4976 | T y = * (T *) b; |
4711 | 4977 | |
4712 | if (x->chimera_pos < y->chimera_pos) { | |
4978 | if (x->siteA_pos < y->siteA_pos) { | |
4713 | 4979 | return -1; |
4714 | } else if (x->chimera_pos > y->chimera_pos) { | |
4980 | } else if (x->siteA_pos > y->siteA_pos) { | |
4981 | return +1; | |
4982 | } else if (x->genomicstart < y->genomicstart) { | |
4983 | return -1; | |
4984 | } else if (x->genomicstart > y->genomicstart) { | |
4985 | return +1; | |
4986 | } else if (x->siteA_knownp == true && y->siteA_knownp == false) { | |
4987 | return -1; | |
4988 | } else if (y->siteA_knownp == true && x->siteA_knownp == false) { | |
4989 | return +1; | |
4990 | } else { | |
4991 | return 0; | |
4992 | } | |
4993 | } | |
4994 | ||
4995 | static int | |
4996 | ascending_siteN_pos_cmp (const void *a, const void *b) { | |
4997 | T x = * (T *) a; | |
4998 | T y = * (T *) b; | |
4999 | ||
5000 | if (x->siteN_pos < y->siteN_pos) { | |
5001 | return -1; | |
5002 | } else if (x->siteN_pos > y->siteN_pos) { | |
5003 | return +1; | |
5004 | } else if (x->genomicstart < y->genomicstart) { | |
5005 | return -1; | |
5006 | } else if (x->genomicstart > y->genomicstart) { | |
5007 | return +1; | |
5008 | } else { | |
5009 | return 0; | |
5010 | } | |
5011 | } | |
5012 | ||
5013 | static int | |
5014 | descending_siteD_pos_cmp (const void *a, const void *b) { | |
5015 | T x = * (T *) a; | |
5016 | T y = * (T *) b; | |
5017 | ||
5018 | if (x->siteD_pos < y->siteD_pos) { | |
5019 | return -1; | |
5020 | } else if (x->siteD_pos > y->siteD_pos) { | |
4715 | 5021 | return +1; |
4716 | 5022 | } else if (x->genomicstart > y->genomicstart) { |
4717 | 5023 | return -1; |
4718 | 5024 | } else if (x->genomicstart < y->genomicstart) { |
4719 | 5025 | return +1; |
4720 | } else if (x->chimera_knownp == true && y->chimera_knownp == false) { | |
5026 | } else if (x->siteD_knownp == true && y->siteD_knownp == false) { | |
4721 | 5027 | return -1; |
4722 | } else if (y->chimera_knownp == true && x->chimera_knownp == false) { | |
5028 | } else if (y->siteD_knownp == true && x->siteD_knownp == false) { | |
4723 | 5029 | return +1; |
4724 | 5030 | } else { |
4725 | 5031 | return 0; |
4726 | 5032 | } |
4727 | 5033 | } |
4728 | 5034 | |
5035 | static int | |
5036 | descending_siteA_pos_cmp (const void *a, const void *b) { | |
5037 | T x = * (T *) a; | |
5038 | T y = * (T *) b; | |
5039 | ||
5040 | if (x->siteA_pos < y->siteA_pos) { | |
5041 | return -1; | |
5042 | } else if (x->siteA_pos > y->siteA_pos) { | |
5043 | return +1; | |
5044 | } else if (x->genomicstart > y->genomicstart) { | |
5045 | return -1; | |
5046 | } else if (x->genomicstart < y->genomicstart) { | |
5047 | return +1; | |
5048 | } else if (x->siteA_knownp == true && y->siteA_knownp == false) { | |
5049 | return -1; | |
5050 | } else if (y->siteA_knownp == true && x->siteA_knownp == false) { | |
5051 | return +1; | |
5052 | } else { | |
5053 | return 0; | |
5054 | } | |
5055 | } | |
5056 | ||
5057 | static int | |
5058 | descending_siteN_pos_cmp (const void *a, const void *b) { | |
5059 | T x = * (T *) a; | |
5060 | T y = * (T *) b; | |
5061 | ||
5062 | if (x->siteN_pos < y->siteN_pos) { | |
5063 | return -1; | |
5064 | } else if (x->siteN_pos > y->siteN_pos) { | |
5065 | return +1; | |
5066 | } else if (x->genomicstart > y->genomicstart) { | |
5067 | return -1; | |
5068 | } else if (x->genomicstart < y->genomicstart) { | |
5069 | return +1; | |
5070 | } else { | |
5071 | return 0; | |
5072 | } | |
5073 | } | |
5074 | ||
4729 | 5075 | List_T |
4730 | Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) { | |
5076 | Substring_sort_siteD_halves (List_T hitlist, bool ascendingp) { | |
4731 | 5077 | List_T sorted = NULL; |
4732 | 5078 | T x, *hits; |
4733 | 5079 | int n, i, j; |
4744 | 5090 | List_fill_array_and_free((void **) hits,&hitlist); |
4745 | 5091 | |
4746 | 5092 | if (ascendingp == true) { |
4747 | qsort(hits,n,sizeof(T),ascending_pos_cmp); | |
4748 | } else { | |
4749 | qsort(hits,n,sizeof(T),descending_pos_cmp); | |
5093 | qsort(hits,n,sizeof(T),ascending_siteD_pos_cmp); | |
5094 | } else { | |
5095 | qsort(hits,n,sizeof(T),descending_siteD_pos_cmp); | |
4750 | 5096 | } |
4751 | 5097 | |
4752 | 5098 | /* Check for duplicates */ |
4754 | 5100 | for (i = 0; i < n; i++) { |
4755 | 5101 | x = hits[i]; |
4756 | 5102 | j = i+1; |
4757 | while (j < n && hits[j]->chimera_pos == x->chimera_pos && hits[j]->genomicstart == x->genomicstart) { | |
5103 | while (j < n && hits[j]->siteD_pos == x->siteD_pos && hits[j]->genomicstart == x->genomicstart) { | |
5104 | eliminate[j] = true; | |
5105 | j++; | |
5106 | } | |
5107 | } | |
5108 | ||
5109 | debug(j = 0); | |
5110 | for (i = n-1; i >= 0; i--) { | |
5111 | x = hits[i]; | |
5112 | if (eliminate[i] == false) { | |
5113 | sorted = List_push(sorted,x); | |
5114 | } else { | |
5115 | Substring_free(&x); | |
5116 | debug(j++); | |
5117 | } | |
5118 | } | |
5119 | debug(printf("%d eliminated\n",j)); | |
5120 | ||
5121 | FREEA(hits); | |
5122 | FREEA(eliminate); | |
5123 | ||
5124 | return sorted; | |
5125 | } | |
5126 | ||
5127 | List_T | |
5128 | Substring_sort_siteA_halves (List_T hitlist, bool ascendingp) { | |
5129 | List_T sorted = NULL; | |
5130 | T x, *hits; | |
5131 | int n, i, j; | |
5132 | bool *eliminate; | |
5133 | ||
5134 | n = List_length(hitlist); | |
5135 | debug(printf("Checking %d spliceends for duplicates...",n)); | |
5136 | if (n == 0) { | |
5137 | debug(printf("\n")); | |
5138 | return NULL; | |
5139 | } | |
5140 | ||
5141 | hits = (T *) MALLOCA(n * sizeof(T)); | |
5142 | List_fill_array_and_free((void **) hits,&hitlist); | |
5143 | ||
5144 | if (ascendingp == true) { | |
5145 | qsort(hits,n,sizeof(T),ascending_siteA_pos_cmp); | |
5146 | } else { | |
5147 | qsort(hits,n,sizeof(T),descending_siteA_pos_cmp); | |
5148 | } | |
5149 | ||
5150 | /* Check for duplicates */ | |
5151 | eliminate = (bool *) CALLOCA(n,sizeof(bool)); | |
5152 | for (i = 0; i < n; i++) { | |
5153 | x = hits[i]; | |
5154 | j = i+1; | |
5155 | while (j < n && hits[j]->siteA_pos == x->siteA_pos && hits[j]->genomicstart == x->genomicstart) { | |
5156 | eliminate[j] = true; | |
5157 | j++; | |
5158 | } | |
5159 | } | |
5160 | ||
5161 | debug(j = 0); | |
5162 | for (i = n-1; i >= 0; i--) { | |
5163 | x = hits[i]; | |
5164 | if (eliminate[i] == false) { | |
5165 | sorted = List_push(sorted,x); | |
5166 | } else { | |
5167 | Substring_free(&x); | |
5168 | debug(j++); | |
5169 | } | |
5170 | } | |
5171 | debug(printf("%d eliminated\n",j)); | |
5172 | ||
5173 | FREEA(hits); | |
5174 | FREEA(eliminate); | |
5175 | ||
5176 | return sorted; | |
5177 | } | |
5178 | ||
5179 | List_T | |
5180 | Substring_sort_siteN_halves (List_T hitlist, bool ascendingp) { | |
5181 | List_T sorted = NULL; | |
5182 | T x, *hits; | |
5183 | int n, i, j; | |
5184 | bool *eliminate; | |
5185 | ||
5186 | n = List_length(hitlist); | |
5187 | debug(printf("Checking %d spliceends for duplicates...",n)); | |
5188 | if (n == 0) { | |
5189 | debug(printf("\n")); | |
5190 | return NULL; | |
5191 | } | |
5192 | ||
5193 | hits = (T *) MALLOCA(n * sizeof(T)); | |
5194 | List_fill_array_and_free((void **) hits,&hitlist); | |
5195 | ||
5196 | if (ascendingp == true) { | |
5197 | qsort(hits,n,sizeof(T),ascending_siteN_pos_cmp); | |
5198 | } else { | |
5199 | qsort(hits,n,sizeof(T),descending_siteN_pos_cmp); | |
5200 | } | |
5201 | ||
5202 | /* Check for duplicates */ | |
5203 | eliminate = (bool *) CALLOCA(n,sizeof(bool)); | |
5204 | for (i = 0; i < n; i++) { | |
5205 | x = hits[i]; | |
5206 | j = i+1; | |
5207 | while (j < n && hits[j]->siteN_pos == x->siteN_pos && hits[j]->genomicstart == x->genomicstart) { | |
4758 | 5208 | eliminate[j] = true; |
4759 | 5209 | j++; |
4760 | 5210 | } |
4945 | 5395 | } |
4946 | 5396 | |
4947 | 5397 | /* Note: this->chimera_knownp might not be set for GMAP alignments */ |
4948 | if (this->chimera_knownp == true) { | |
5398 | if (this->siteD_knownp == true) { | |
4949 | 5399 | /* Note: IIT_get_typed_signed_with_divno does not work here */ |
4950 | 5400 | splicesites = IIT_get_exact_multiple_with_divno(&nsplicesites,splicesites_iit, |
4951 | 5401 | splicesites_divint_crosstable[this->chrnum], |
5327 | 5777 | /* Handle result of substring_trim_novel_spliceends */ |
5328 | 5778 | if (invertp == false) { |
5329 | 5779 | if (substring->start_endtype == DON) { |
5330 | FPRINTF(fp,"donor:%.2f",substring->trim_left,substring->chimera_prob); | |
5780 | FPRINTF(fp,"donor:%.2f",substring->siteD_prob); | |
5331 | 5781 | } else if (substring->start_endtype == ACC) { |
5332 | FPRINTF(fp,"acceptor:%.2f",substring->trim_left,substring->chimera_prob); | |
5782 | FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob); | |
5333 | 5783 | } else { |
5334 | 5784 | FPRINTF(fp,"start:%d",substring->trim_left); |
5335 | 5785 | } |
5336 | 5786 | } else { |
5337 | 5787 | if (substring->end_endtype == DON) { |
5338 | FPRINTF(fp,"donor:%.2f",substring->trim_right,substring->chimera_prob_2); | |
5788 | FPRINTF(fp,"donor:%.2f",substring->siteD_prob); | |
5339 | 5789 | } else if (substring->end_endtype == ACC) { |
5340 | FPRINTF(fp,"acceptor:%.2f",substring->trim_right,substring->chimera_prob_2); | |
5790 | FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob); | |
5341 | 5791 | } else { |
5342 | 5792 | FPRINTF(fp,"start:%d",substring->trim_right); |
5343 | 5793 | } |
5373 | 5823 | /* Handle result of substring_trim_novel_spliceends */ |
5374 | 5824 | if (invertp == false) { |
5375 | 5825 | if (substring->end_endtype == DON) { |
5376 | FPRINTF(fp,"donor:%.2f",substring->trim_right,substring->chimera_prob_2); | |
5826 | FPRINTF(fp,"donor:%.2f",substring->siteD_prob); | |
5377 | 5827 | } else if (substring->end_endtype == ACC) { |
5378 | FPRINTF(fp,"acceptor:%.2f",substring->trim_right,substring->chimera_prob_2); | |
5828 | FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob); | |
5379 | 5829 | } else { |
5380 | 5830 | FPRINTF(fp,"end:%d",substring->trim_right); |
5381 | 5831 | } |
5382 | 5832 | } else { |
5383 | 5833 | if (substring->start_endtype == DON) { |
5384 | FPRINTF(fp,"donor:%.2f",substring->trim_left,substring->chimera_prob); | |
5834 | FPRINTF(fp,"donor:%.2f",substring->siteD_prob); | |
5385 | 5835 | } else if (substring->start_endtype == ACC) { |
5386 | FPRINTF(fp,"acceptor:%.2f",substring->trim_left,substring->chimera_prob); | |
5836 | FPRINTF(fp,"acceptor:%.2f",substring->siteA_prob); | |
5387 | 5837 | } else { |
5388 | 5838 | FPRINTF(fp,"end:%d",substring->trim_left); |
5389 | 5839 | } |
5809 | 6259 | FPRINTF(fp,"\t"); |
5810 | 6260 | if (sensedir == SENSE_FORWARD) { |
5811 | 6261 | if (invertp == false) { |
5812 | FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_left,donor->chimera_prob); | |
6262 | FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_left,donor->siteD_prob); | |
5813 | 6263 | label_tag = "label_2"; |
5814 | 6264 | splice_dist_tag = "splice_dist_2"; |
5815 | 6265 | } else { |
5816 | FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_left); | |
6266 | FPRINTF(fp,"donor:%.2f..end:%d",donor->siteD_prob,donor->trim_left); | |
5817 | 6267 | label_tag = "label_1"; |
5818 | 6268 | splice_dist_tag = "splice_dist_1"; |
5819 | 6269 | } |
5820 | 6270 | } else if (sensedir == SENSE_ANTI) { |
5821 | 6271 | if (invertp == false) { |
5822 | FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_right); | |
6272 | FPRINTF(fp,"donor:%.2f..end:%d",donor->siteD_prob,donor->trim_right); | |
5823 | 6273 | label_tag = "label_1"; |
5824 | 6274 | splice_dist_tag = "splice_dist_1"; |
5825 | 6275 | } else { |
5826 | FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_right,donor->chimera_prob); | |
6276 | FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_right,donor->siteD_prob); | |
5827 | 6277 | label_tag = "label_2"; |
5828 | 6278 | splice_dist_tag = "splice_dist_2"; |
5829 | 6279 | } |
5830 | 6280 | } else { |
5831 | 6281 | /* SENSE_NULL */ |
5832 | 6282 | if (invertp == false) { |
5833 | FPRINTF(fp,"start:%d..splice:%.2f",donor->trim_left,donor->chimera_prob); | |
6283 | FPRINTF(fp,"start:%d..splice:%.2f",donor->trim_left,donor->siteD_prob); | |
5834 | 6284 | label_tag = "label_2"; |
5835 | 6285 | splice_dist_tag = "splice_dist_2"; |
5836 | 6286 | } else { |
5837 | FPRINTF(fp,"splice:%.2f..end:%d",donor->chimera_prob,donor->trim_left); | |
6287 | FPRINTF(fp,"splice:%.2f..end:%d",donor->siteD_prob,donor->trim_left); | |
5838 | 6288 | label_tag = "label_1"; |
5839 | 6289 | splice_dist_tag = "splice_dist_1"; |
5840 | 6290 | } |
5870 | 6320 | } |
5871 | 6321 | |
5872 | 6322 | #ifdef CHECK_KNOWNI |
5873 | if (donor->chimera_knownp == false && splicesites_iit) { | |
6323 | if (donor->siteD_knownp == false && splicesites_iit) { | |
5874 | 6324 | if (donor->plusp == true) { |
5875 | 6325 | splicesitepos = donor->genomicstart - donor->chroffset + donor->chimera_pos; |
5876 | 6326 | } else { |
5883 | 6333 | } |
5884 | 6334 | #endif |
5885 | 6335 | |
5886 | if (donor->chimera_knownp && splicesites_iit) { | |
5887 | print_splicesite_labels(fp,donor,donor_typeint,donor->chimera_pos,label_tag); | |
6336 | if (donor->siteD_knownp && splicesites_iit) { | |
6337 | print_splicesite_labels(fp,donor,donor_typeint,donor->siteD_pos,label_tag); | |
5888 | 6338 | } |
5889 | 6339 | |
5890 | 6340 | if (allocp == true) { |
5916 | 6366 | FPRINTF(fp,"\t"); |
5917 | 6367 | if (sensedir == SENSE_FORWARD) { |
5918 | 6368 | if (invertp == false) { |
5919 | FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right); | |
6369 | FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_right); | |
5920 | 6370 | label_tag = "label_1"; |
5921 | 6371 | splice_dist_tag = "splice_dist_1"; |
5922 | 6372 | } else { |
5923 | FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->chimera_prob); | |
6373 | FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->siteA_prob); | |
5924 | 6374 | label_tag = "label_2"; |
5925 | 6375 | splice_dist_tag = "splice_dist_2"; |
5926 | 6376 | } |
5927 | 6377 | } else if (sensedir == SENSE_ANTI) { |
5928 | 6378 | if (invertp == false) { |
5929 | FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->chimera_prob); | |
6379 | FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->siteA_prob); | |
5930 | 6380 | label_tag = "label_2"; |
5931 | 6381 | splice_dist_tag = "splice_dist_2"; |
5932 | 6382 | } else { |
5933 | FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_left); | |
6383 | FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_left); | |
5934 | 6384 | label_tag = "label_1"; |
5935 | 6385 | splice_dist_tag = "splice_dist_1"; |
5936 | 6386 | } |
5937 | 6387 | } else { |
5938 | 6388 | /* SENSE_NULL */ |
5939 | 6389 | if (invertp == false) { |
5940 | FPRINTF(fp,"splice:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right); | |
6390 | FPRINTF(fp,"splice:%.2f..end:%d",acceptor->siteA_prob,acceptor->trim_right); | |
5941 | 6391 | label_tag = "label_1"; |
5942 | 6392 | splice_dist_tag = "splice_dist_1"; |
5943 | 6393 | } else { |
5944 | FPRINTF(fp,"start:%d..splice:%.2f",acceptor->trim_right,acceptor->chimera_prob); | |
6394 | FPRINTF(fp,"start:%d..splice:%.2f",acceptor->trim_right,acceptor->siteA_prob); | |
5945 | 6395 | label_tag = "label_2"; |
5946 | 6396 | splice_dist_tag = "splice_dist_2"; |
5947 | 6397 | } |
5979 | 6429 | #ifdef CHECK_KNOWNI |
5980 | 6430 | if (acceptor->chimera_knownp == false && splicesites_iit) { |
5981 | 6431 | if (acceptor->plusp == true) { |
5982 | splicesitepos = acceptor->genomicstart - acceptor->chroffset + acceptor->chimera_pos; | |
5983 | } else { | |
5984 | splicesitepos = acceptor->genomicstart - acceptor->chroffset - acceptor->chimera_pos; | |
6432 | splicesitepos = acceptor->genomicstart - acceptor->chroffset + acceptor->siteA_pos; | |
6433 | } else { | |
6434 | splicesitepos = acceptor->genomicstart - acceptor->chroffset - acceptor->siteA_pos; | |
5985 | 6435 | } |
5986 | 6436 | splicesites = IIT_get_exact_multiple_with_divno(&nsplicesites,splicesites_iit, |
5987 | 6437 | splicesites_divint_crosstable[acceptor->chrnum], |
5991 | 6441 | #endif |
5992 | 6442 | |
5993 | 6443 | |
5994 | if (acceptor->chimera_knownp && splicesites_iit) { | |
5995 | print_splicesite_labels(fp,acceptor,acceptor_typeint,acceptor->chimera_pos,label_tag); | |
6444 | if (acceptor->siteA_knownp && splicesites_iit) { | |
6445 | print_splicesite_labels(fp,acceptor,acceptor_typeint,acceptor->siteA_pos,label_tag); | |
5996 | 6446 | } |
5997 | 6447 | |
5998 | 6448 | |
6051 | 6501 | |
6052 | 6502 | FPRINTF(fp,"\t"); |
6053 | 6503 | if (sensedir == SENSE_FORWARD && invertp == false) { |
6054 | FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2); | |
6504 | FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->siteA_prob,shortexon->siteD_prob); | |
6055 | 6505 | } else if (sensedir == SENSE_FORWARD && invertp == true) { |
6056 | FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob); | |
6506 | FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->siteD_prob,shortexon->siteA_prob); | |
6057 | 6507 | } else if (sensedir == SENSE_ANTI && invertp == false) { |
6058 | FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob); | |
6508 | FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->siteD_prob,shortexon->siteA_prob); | |
6059 | 6509 | } else if (sensedir == SENSE_ANTI && invertp == true) { |
6060 | FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2); | |
6510 | FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->siteA_prob,shortexon->siteD_prob); | |
6061 | 6511 | } |
6062 | 6512 | |
6063 | 6513 | FPRINTF(fp,",matches:%d,sub:%d",shortexon->nmatches,shortexon->nmismatches_bothdiff); |
6072 | 6522 | FPRINTF(fp,",dir:sense"); |
6073 | 6523 | print_shortexon_splice_distances(fp,distance1,distance2); |
6074 | 6524 | |
6075 | if (shortexon->chimera_knownp && splicesites_iit) { | |
6525 | if (shortexon->siteA_knownp && splicesites_iit) { | |
6076 | 6526 | print_splicesite_labels(fp,shortexon,acceptor_typeint, |
6077 | shortexon->chimera_pos,/*tag*/"label_1"); | |
6078 | } | |
6079 | if (shortexon->chimera_knownp_2 && splicesites_iit) { | |
6527 | shortexon->siteA_pos,/*tag*/"label_1"); | |
6528 | } | |
6529 | if (shortexon->siteD_knownp && splicesites_iit) { | |
6080 | 6530 | print_splicesite_labels(fp,shortexon,donor_typeint, |
6081 | shortexon->chimera_pos_2,/*tag*/"label_2"); | |
6531 | shortexon->siteD_pos,/*tag*/"label_2"); | |
6082 | 6532 | } |
6083 | 6533 | |
6084 | 6534 | } else if (sensedir == SENSE_FORWARD && invertp == true) { |
6085 | 6535 | FPRINTF(fp,",dir:antisense"); |
6086 | 6536 | print_shortexon_splice_distances(fp,distance1,distance2); |
6087 | 6537 | |
6088 | if (shortexon->chimera_knownp_2 && splicesites_iit) { | |
6538 | if (shortexon->siteD_knownp && splicesites_iit) { | |
6089 | 6539 | print_splicesite_labels(fp,shortexon,donor_typeint, |
6090 | shortexon->chimera_pos_2,/*tag*/"label_1"); | |
6091 | } | |
6092 | if (shortexon->chimera_knownp && splicesites_iit) { | |
6540 | shortexon->siteD_pos,/*tag*/"label_1"); | |
6541 | } | |
6542 | if (shortexon->siteA_knownp && splicesites_iit) { | |
6093 | 6543 | print_splicesite_labels(fp,shortexon,acceptor_typeint, |
6094 | shortexon->chimera_pos,/*tag*/"label_2"); | |
6544 | shortexon->siteA_pos,/*tag*/"label_2"); | |
6095 | 6545 | } |
6096 | 6546 | |
6097 | 6547 | } else if (sensedir == SENSE_ANTI && invertp == false) { |
6100 | 6550 | |
6101 | 6551 | |
6102 | 6552 | |
6103 | if (shortexon->chimera_knownp_2 && splicesites_iit) { | |
6553 | if (shortexon->siteD_knownp && splicesites_iit) { | |
6104 | 6554 | print_splicesite_labels(fp,shortexon,donor_typeint, |
6105 | shortexon->chimera_pos_2,/*tag*/"label_1"); | |
6106 | } | |
6107 | ||
6108 | if (shortexon->chimera_knownp && splicesites_iit) { | |
6555 | shortexon->siteD_pos,/*tag*/"label_1"); | |
6556 | } | |
6557 | ||
6558 | if (shortexon->siteA_knownp && splicesites_iit) { | |
6109 | 6559 | print_splicesite_labels(fp,shortexon,acceptor_typeint, |
6110 | shortexon->chimera_pos,/*tag*/"label_2"); | |
6560 | shortexon->siteA_pos,/*tag*/"label_2"); | |
6111 | 6561 | } |
6112 | 6562 | |
6113 | 6563 | } else if (sensedir == SENSE_ANTI && invertp == true) { |
6114 | 6564 | FPRINTF(fp,",dir:sense"); |
6115 | 6565 | print_shortexon_splice_distances(fp,distance1,distance2); |
6116 | if (shortexon->chimera_knownp && splicesites_iit) { | |
6566 | if (shortexon->siteA_knownp && splicesites_iit) { | |
6117 | 6567 | print_splicesite_labels(fp,shortexon,acceptor_typeint, |
6118 | shortexon->chimera_pos,/*tag*/"label_1"); | |
6119 | } | |
6120 | if (shortexon->chimera_knownp_2 && splicesites_iit) { | |
6568 | shortexon->siteA_pos,/*tag*/"label_1"); | |
6569 | } | |
6570 | if (shortexon->siteD_knownp && splicesites_iit) { | |
6121 | 6571 | print_splicesite_labels(fp,shortexon,donor_typeint, |
6122 | shortexon->chimera_pos_2,/*tag*/"label_2"); | |
6572 | shortexon->siteD_pos,/*tag*/"label_2"); | |
6123 | 6573 | } |
6124 | 6574 | } |
6125 | 6575 |
0 | /* $Id: substring.h 195961 2016-08-08 16:36:34Z twu $ */ | |
0 | /* $Id: substring.h 196273 2016-08-12 15:15:06Z twu $ */ | |
1 | 1 | #ifndef SUBSTRING_INCLUDED |
2 | 2 | #define SUBSTRING_INCLUDED |
3 | 3 | |
17 | 17 | #include "junction.h" |
18 | 18 | #include "intlist.h" |
19 | 19 | #include "doublelist.h" |
20 | #include "list.h" | |
20 | 21 | #ifdef LARGE_GENOMES |
21 | 22 | #include "uint8list.h" |
22 | 23 | #else |
31 | 32 | |
32 | 33 | extern char * |
33 | 34 | Endtype_string (Endtype_T endtype); |
35 | ||
36 | extern char * | |
37 | Trimaction_string (Trimaction_T trimaction); | |
34 | 38 | |
35 | 39 | extern void |
36 | 40 | Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in, |
60 | 64 | int minlength, int sensedir); |
61 | 65 | |
62 | 66 | extern T |
63 | Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength, | |
64 | Chrnum_T chrnum, Univcoord_T chroffset, | |
65 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
66 | bool plusp, int genestrand, | |
67 | Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int querylength, | |
68 | Chrnum_T chrnum, Univcoord_T chroffset, | |
69 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
70 | bool plusp, int genestrand, | |
67 | 71 | #ifdef LARGE_GENOMES |
68 | Uint8list_T ambcoords, | |
72 | Uint8list_T ambcoords, | |
69 | 73 | #else |
70 | Uintlist_T ambcoords, | |
74 | Uintlist_T ambcoords, | |
71 | 75 | #endif |
72 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
73 | double amb_common_prob, bool amb_donor_common_p, bool substring1p); | |
76 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
77 | double amb_common_prob, bool substring1p); | |
78 | ||
79 | extern T | |
80 | Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int querylength, | |
81 | Chrnum_T chrnum, Univcoord_T chroffset, | |
82 | Univcoord_T chrhigh, Chrpos_T chrlength, | |
83 | bool plusp, int genestrand, | |
84 | #ifdef LARGE_GENOMES | |
85 | Uint8list_T ambcoords, | |
86 | #else | |
87 | Uintlist_T ambcoords, | |
88 | #endif | |
89 | Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs, | |
90 | double amb_common_prob, bool substring1p); | |
74 | 91 | |
75 | 92 | extern Univcoord_T |
76 | 93 | Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend, |
106 | 123 | extern Univcoord_T |
107 | 124 | Substring_left (T this); |
108 | 125 | extern Univcoord_T |
109 | Substring_splicecoord (T this); | |
110 | extern Chrpos_T | |
111 | Substring_chr_splicecoord (T this); | |
112 | extern int | |
113 | Substring_splicesites_knowni (T this); | |
114 | extern Univcoord_T | |
115 | 126 | Substring_splicecoord_A (T this); |
116 | 127 | extern Univcoord_T |
117 | 128 | Substring_splicecoord_D (T this); |
129 | extern Chrpos_T | |
130 | Substring_chr_splicecoord_D (T this); | |
131 | extern Chrpos_T | |
132 | Substring_chr_splicecoord_A (T this); | |
133 | extern int | |
134 | Substring_splicesitesD_knowni (T this); | |
135 | extern int | |
136 | Substring_splicesitesA_knowni (T this); | |
118 | 137 | |
119 | 138 | extern bool |
120 | 139 | Substring_plusp (T this); |
225 | 244 | Substring_amb_acceptor_prob (T this); |
226 | 245 | |
227 | 246 | extern double |
247 | Substring_siteD_prob (T this); | |
248 | extern double | |
228 | 249 | Substring_siteA_prob (T this); |
229 | extern double | |
230 | Substring_siteD_prob (T this); | |
231 | ||
232 | extern double | |
233 | Substring_chimera_prob (T this); | |
234 | extern double | |
235 | Substring_chimera_prob_2 (T this); | |
236 | extern int | |
237 | Substring_chimera_pos (T this); | |
238 | extern int | |
239 | Substring_chimera_pos_A (T this); | |
240 | extern int | |
241 | Substring_chimera_pos_D (T this); | |
250 | ||
251 | extern int | |
252 | Substring_siteD_pos (T this); | |
253 | extern int | |
254 | Substring_siteA_pos (T this); | |
255 | extern int | |
256 | Substring_siteN_pos (T this); | |
242 | 257 | extern int |
243 | 258 | Substring_chimera_sensedir (T this); |
244 | 259 | |
245 | 260 | extern bool |
246 | 261 | Substring_ambiguous_p (T this); |
262 | extern bool | |
263 | Substring_list_ambiguous_p (List_T list); | |
247 | 264 | extern int |
248 | 265 | Substring_nambcoords (T this); |
249 | 266 | extern Univcoord_T * |
295 | 312 | Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength); |
296 | 313 | |
297 | 314 | extern List_T |
298 | Substring_sort_chimera_halves (List_T hitlist, bool ascendingp); | |
315 | Substring_sort_siteD_halves (List_T hitlist, bool ascendingp); | |
316 | extern List_T | |
317 | Substring_sort_siteA_halves (List_T hitlist, bool ascendingp); | |
318 | extern List_T | |
319 | Substring_sort_siteN_halves (List_T hitlist, bool ascendingp); | |
299 | 320 | |
300 | 321 | |
301 | 322 | extern Chrpos_T |
348 | 369 | Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength, |
349 | 370 | int hardclip_low, int hardclip_high, int queryseq_offset); |
350 | 371 | |
351 | extern void | |
352 | Substring_trim_novel_spliceends (T substring1, T substringN, int *ambig_end_length_5, int *ambig_end_length_3, | |
353 | Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3, | |
354 | double *ambig_prob_5, double *ambig_prob_3, int *sensedir); | |
355 | ||
356 | 372 | #undef T |
357 | 373 | #endif |
358 | 374 |
0 | static char rcsid[] = "$Id: uniqscan.c 193877 2016-07-12 02:46:33Z twu $"; | |
0 | static char rcsid[] = "$Id: uniqscan.c 196438 2016-08-16 20:23:27Z twu $"; | |
1 | 1 | #ifdef HAVE_CONFIG_H |
2 | 2 | #include <config.h> |
3 | 3 | #endif |
58 | 58 | #include "getopt.h" |
59 | 59 | |
60 | 60 | |
61 | #define MAX_FLOORS_READLENGTH 300 | |
61 | 62 | #define MAX_QUERYLENGTH_FOR_ALLOC 100000 |
62 | 63 | #define MAX_GENOMICLENGTH_FOR_ALLOC 1000000 |
63 | 64 | |
392 | 393 | fprintf(stdout,"Sizes: off_t (%d), size_t (%d), unsigned int (%d), long int (%d), long long int (%d)\n", |
393 | 394 | (int) sizeof(off_t),(int) sizeof(size_t),(int) sizeof(unsigned int),(int) sizeof(long int),(int) sizeof(long long int)); |
394 | 395 | fprintf(stdout,"Default gmap directory: %s\n",GMAPDB); |
395 | fprintf(stdout,"Maximum read length: %d\n",MAX_READLENGTH); | |
396 | fprintf(stdout,"Maximum stack read length: %d\n",MAX_STACK_READLENGTH); | |
396 | 397 | fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n"); |
397 | 398 | fprintf(stdout,"Contact: twu@gene.com\n"); |
398 | 399 | fprintf(stdout,"\n"); |
447 | 448 | diagpool = Diagpool_new(); |
448 | 449 | cellpool = Cellpool_new(); |
449 | 450 | |
450 | floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T)); | |
451 | floors_array = (Floors_T *) CALLOC(MAX_FLOORS_READLENGTH+1,sizeof(Floors_T)); | |
451 | 452 | /* Except_stack_create(); -- requires pthreads */ |
452 | 453 | |
453 | 454 | for (i = 0; i < 10; i++) { |
552 | 553 | |
553 | 554 | } |
554 | 555 | |
555 | for (i = 0; i <= MAX_READLENGTH; i++) { | |
556 | for (i = 0; i <= MAX_FLOORS_READLENGTH; i++) { | |
556 | 557 | if (floors_array[i] != NULL) { |
557 | 558 | Floors_free_keep(&(floors_array[i])); |
558 | 559 | } |
1301 | 1302 | nullgap,maxpeelback,maxpeelback_distalmedial, |
1302 | 1303 | extramaterial_end,extramaterial_paired,gmap_mode, |
1303 | 1304 | trigger_score_for_gmap,gmap_allowance,max_gmap_pairsearch, |
1304 | max_gmap_terminal,max_gmap_improvement,antistranded_penalty); | |
1305 | max_gmap_terminal,max_gmap_improvement,antistranded_penalty, | |
1306 | MAX_FLOORS_READLENGTH); | |
1305 | 1307 | Substring_setup(/*print_nsnpdiffs_p*/false,/*print_snplabels_p*/false, |
1306 | 1308 | /*show_refdiff_p*/false,snps_iit,snps_divint_crosstable, |
1307 | 1309 | genes_iit,genes_divint_crosstable, |
1321 | 1323 | Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,/*sam_insert_0M_p*/false, |
1322 | 1324 | /*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false, |
1323 | 1325 | /*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false, |
1324 | Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false)); | |
1326 | Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false), | |
1327 | /*gff3_phase_swap_p*/false); | |
1325 | 1328 | Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp, |
1326 | 1329 | /*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable, |
1327 | 1330 | donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends, |
0 | /* $Id: univdiag.h 195760 2016-08-04 00:12:04Z twu $ */ | |
0 | /* $Id: univdiag.h 196273 2016-08-12 15:15:06Z twu $ */ | |
1 | 1 | #ifndef UNIVDIAG_INCLUDED |
2 | 2 | #define UNIVDIAG_INCLUDED |
3 | 3 |
389 | 389 | LN_S = @LN_S@ |
390 | 390 | LTLIBOBJS = @LTLIBOBJS@ |
391 | 391 | LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ |
392 | MAINT = @MAINT@ | |
392 | 393 | MAKEINFO = @MAKEINFO@ |
393 | 394 | MANIFEST_TOOL = @MANIFEST_TOOL@ |
394 | MAX_READLENGTH = @MAX_READLENGTH@ | |
395 | MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@ | |
395 | 396 | MKDIR_P = @MKDIR_P@ |
396 | 397 | MPICC = @MPICC@ |
397 | 398 | MPILIBS = @MPILIBS@ |
506 | 507 | |
507 | 508 | .SUFFIXES: |
508 | 509 | .SUFFIXES: .log .test .test$(EXEEXT) .trs |
509 | $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) | |
510 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) | |
510 | 511 | @for dep in $?; do \ |
511 | 512 | case '$(am__configure_deps)' in \ |
512 | 513 | *$$dep*) \ |
530 | 531 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
531 | 532 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
532 | 533 | |
533 | $(top_srcdir)/configure: $(am__configure_deps) | |
534 | $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
534 | 535 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
535 | $(ACLOCAL_M4): $(am__aclocal_m4_deps) | |
536 | $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) | |
536 | 537 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
537 | 538 | $(am__aclocal_m4_deps): |
538 | 539 | align.test: $(top_builddir)/config.status $(srcdir)/align.test.in |
230 | 230 | LN_S = @LN_S@ |
231 | 231 | LTLIBOBJS = @LTLIBOBJS@ |
232 | 232 | LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ |
233 | MAINT = @MAINT@ | |
233 | 234 | MAKEINFO = @MAKEINFO@ |
234 | 235 | MANIFEST_TOOL = @MANIFEST_TOOL@ |
235 | MAX_READLENGTH = @MAX_READLENGTH@ | |
236 | MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@ | |
236 | 237 | MKDIR_P = @MKDIR_P@ |
237 | 238 | MPICC = @MPICC@ |
238 | 239 | MPILIBS = @MPILIBS@ |
356 | 357 | all: all-am |
357 | 358 | |
358 | 359 | .SUFFIXES: |
359 | $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) | |
360 | $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) | |
360 | 361 | @for dep in $?; do \ |
361 | 362 | case '$(am__configure_deps)' in \ |
362 | 363 | *$$dep*) \ |
380 | 381 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
381 | 382 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
382 | 383 | |
383 | $(top_srcdir)/configure: $(am__configure_deps) | |
384 | $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) | |
384 | 385 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
385 | $(ACLOCAL_M4): $(am__aclocal_m4_deps) | |
386 | $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) | |
386 | 387 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
387 | 388 | $(am__aclocal_m4_deps): |
388 | 389 | gmap_compress.pl: $(top_builddir)/config.status $(srcdir)/gmap_compress.pl.in |
37 | 37 | } |
38 | 38 | @exons = (); |
39 | 39 | $sortp = 0; |
40 | $gene_name = get_info(\@info,"gene_name","gene_id"); | |
40 | $gene_name = cat_info(\@info,"gene_id","gene_name"); | |
41 | 41 | $last_transcript_id = $transcript_id; |
42 | 42 | $chr = $fields[0]; |
43 | 43 | $strand = $fields[6]; |
106 | 106 | return "NA"; |
107 | 107 | } |
108 | 108 | |
109 | sub cat_info { | |
110 | my $info = shift @_; | |
111 | my @desired_keys = @_; | |
112 | my @result = (); | |
113 | ||
114 | foreach $desired_key (@desired_keys) { | |
115 | foreach $item (@ {$info}) { | |
116 | ($key,$value) = $item =~ /(\S+) (.+)/; | |
117 | if ($key eq $desired_key) { | |
118 | push @result,$value; | |
119 | } | |
120 | } | |
121 | } | |
122 | ||
123 | if ($#result < 0) { | |
124 | print STDERR "Cannot find " . join(" or ",@desired_keys) . " in " . join("; ",@ {$info}) . "\n"; | |
125 | return "NA"; | |
126 | } else { | |
127 | return join(" ",@result); | |
128 | } | |
129 | } | |
130 | ||
109 | 131 | |
110 | 132 | sub get_info_optional { |
111 | 133 | my $info = shift @_; |
107 | 107 | } |
108 | 108 | @exons = (); |
109 | 109 | $sortp = 0; |
110 | $gene_name = get_info(\@info,"gene_name","gene_id"); | |
110 | $gene_name = get_info(\@info,"gene_id","gene_name"); | |
111 | 111 | $last_transcript_id = $transcript_id; |
112 | 112 | $chr = $fields[0]; |
113 | 113 | $strand = $fields[6]; |