#
# This is a constraints file for the transition matrix file
# trans_shadow_partial_utr.pbl.
#
# This configuration file can be used when optimizing the transition matrix parameters
# for a new species or setting. It is an argument to
# optimize_augustus.pl when run in the transition matrix optimization
# mode, e.g. 
#
# optimize_augustus.pl --species=myspecies
# --opt_trans_matrix=/path/augustus/config/species/myspecies/myspecies_trans_shadow_partial_utr.pbl
# --matrix_constraints=constraints_shadow_partial_utr.txt train.gb
#
# Mario Stanke (23.4.2007)
#
#
# --------------------------------------------------------------------
# This is a list of states that should be tried to be optimized. For
# the state numbers see the corresponding states file 'states_shadow_utr.cfg'.
# States not in this list are simply skipped by optimize_augustus.pl.
# Use a list with one state number per line or use the keyword 'all.
# The order determines the order in which the optimization cycles
# through the states.
#
[TRY]
0  # intergenic region
1  # single exon (CDS)
8  # terminal exon (CDS)
13 # ass 0
18 # ass 1
23 # ass 2
24 # 5' UTR single exon
26 # 5' UTR intron single base
32 # 3' UTR intron single base
65 # reverse 3' UTR single exon
66 # reverse 3' UTR initial exon
# the following states tune the overall frequency of exons (if not normed)
2  # initial exon 0
3  # initial exon 1
4  # initial exon 2
5  # internal exon 0
# --------------------------------------------------------------------
# This is a list of states s, such that the transition probabilities
# out of s are normed:
#
# M[s][0] + M[s][1] + ... + M[s][71] = const. 
#
# The constant is computed from the original
# trans_shadow_partial_utr.pbl file. It is 1.0 with few exceptions.
# For unnormed states the M[s][.] values don't form a probability
# distribution anymore. If you don't care -- AUGUSTUS doesn't.
# Use a list with one state number per line or use the keyword 'all.
#
[NORMED]
0
1
8
13
18
23
24
26
32
36
# --------------------------------------------------------------------
# This section is for seting constraints between transition probabilities,
# such as suggested by symmetry such as strand symmetry (or by treating transitions in
# all reading frames the same). Theoretically, i.e. with infinite and
# representative traininig data, this should not be neccessary. However, in the real finite
# world this is a little safeguard against overfitting.
#
[BINDINGS]
(0,24)+(0,25)=(0,65)+(0,70)     # - the same frequency of genes on both strands
(1,31)=(8,31)                   # - same freq. of spliced 3'UTR after single CDS or multi CDS gene
(13,8)=(18,8)                   # - Prob. of terminating 
(18,8)=(23,8)                   # CDS is independent
(13,8)=(23,8)                   # of reading frame.
(24,1)=(29,1)                   # - reading frame 
(24,2)=(29,2)                   # and multi/single CDS
(24,3)=(29,3)                   # independent of
(24,4)=(29,4)                   # spliced/unspliced 5' UTR
(2,10)=(3,15)                   # - equal exit
(3,15)=(4,20)                   # probs out of initial
(2,10)=(4,20)                   # exons for the three frames
(5,10)=(6,15)                   # - equal exit
(5,10)=(7,20)                   # probs out of internal exons for the three frames
(5,10)=(38,48)                  # and
(5,10)=(39,53)                  # for  
(5,10)=(40,58)                  # the both strands
(36,59)=(37,59)                 # - same ratio of spliced/unspliced 5'UTR after single CDS or multi CDS gene
# the Markov chain of phases of successive introns should
# be the time-reversed chain for the reverse strand
MC(((55,40),(55,38),(55,39)),((45,40),(45,38),(45,39)),((50,40),(50,38),(50,39)))=reverse(MC(((13,5),(13,6),(13,7)),((18,5),(18,6),(18,7)),((23,5),(23,6),(23,7)))
#(26,28)/(26,29)=(27,28)/(27,29) # same ratio of terminal/internal for transitions out of 5' UTR intron
#(32,34)/(32,35)=(33,34)/(33,35) # same ratio of terminal/internal for transitions out of 5' UTR intron
#(0,24)/(0,25)=(36,59)/(36/64)   # same ratio of spliced/unspliced 5'UTR on both strands
