#!/usr/bin/perl

require GFFTransform;

$usage .= "$0 - transform GFF file co-ordinate system\n";
$usage .= "\n";
$usage .= "Usage: $0 [-sortedname] [-pair] [-pairfeature <regexp>] <transformation file> [<input file>]\n";
$usage .= "\n";
$usage .= "Use -sortedname to sort output by <seqname> and <start> (input file must be specified and sorted by <seqname> field)\n";
$usage .= "Use -pair switch to also transform first three words of <group> field as though they were co-ords\n";
$usage .= "Use -pairfeature to selectively transform fields with a match to <regexp> in the <feature> field\n";
$usage .= "\n";
$usage .= "WARNING - currently assumes a many=>one mapping from oldseqname=>newseqname\n";
$usage .= "\n";

$| = 1;

while (@ARGV) {
    last unless $ARGV[0] =~ /^-/;
    $opt = lc shift;
    if ($opt eq "-sortedname") { $sortedname = 1 }
    elsif ($opt eq "-pair") { $pair = 1 }
    elsif ($opt eq "-pairfeature") { defined($pairfeature = shift) or die $usage; push @pairfeature, $pairfeature }
    else { die "$usage\nUnknown option: $opt\n" }
}

if (@ARGV==1 && !$sortedname) { push @ARGV, '-' }

@ARGV==2 or die $usage;
($transformation,$transformee) = @ARGV;

read_transformation($transformation);

open transformee or die "$0: couldn't open $transformee: $!";

if ($sortedname) {

    undef $lines;
    while ($tell = tell(transformee), $_ = <transformee>) {
	s/#.*//;
	next unless /\S/;
	@f = split /\t/, $_, 9;
	unless (exists $pos{$f[0]}) {
	    if (defined $lines) { print STDERR "\t$lines lines\n"; $lines = 0 }
	    print STDERR "Indexing $f[0]\tat $transformee pos $tell...";
	    $pos{$f[0]} = $tell;
	}
	check_validity($f[0]);
	if ($pair || (@pairfeature && grep($f[2] =~ /$_/,@pairfeature))) {
	    @g = split /\s+/, $f[8], 4;
	    check_validity($g[0]);
	}
	++$lines;
    }
    if (defined $lines) { print STDERR "\t$lines lines\n" }
    print STDERR "Indexing done; sorting sequence list ...\n";
    
    @seqname = sort { $name{$a} cmp $name{$b} or $start{$a} <=> $start{$b} or $end{$a} <=> $end{$b} } keys %name;
    
    warn "Sorting done.\n";
    
    foreach $seqname (@seqname) {
	print STDERR "Reading $seqname ...\n";
	next unless exists $pos{$seqname};
	seek transformee, $pos{$seqname}, 0;
	%start1 = %name2 = %start2 = ();
	while (<transformee>) {
	    last unless /^$seqname\t/;
	    ($text,$start1,$name2,$start2) = transform_gff($_);
	    $start1{$text} = $start1;
	    $name2{$text} = $name2;
	    $start2{$text} = $start2;
	}
	if (!defined($lastname1) || $name{$seqname} ne $lastname1) {
	    $lastname1 = $name{$seqname};
	    undef $laststart1;
	    undef $lastname2;
	    undef $laststart2;
	}
	foreach (sort { $start1{$a}<=>$start1{$b} || $name2{$a} cmp $name2{$b} || $start2{$a}<=>$start2{$b} } keys %start1) {
	    if (!defined($laststart1) || ($start1{$_} >= $laststart1 && $name2{$_} ge $lastname2 && $start2{$_} >= $laststart2)) {
		print;
		$laststart1 = $start1{$_};
		$lastname2 = $name2{$_};
		$laststart2 = $start2{$_};
	    }
	}
    }

} else {

    while (<transformee>) {
	($text,$start1,$name2,$start2) = transform_gff($_);
	print $text;
    }

}

sub transform_gff {
    my ($gfftext) = @_;
    chomp $gfftext;
    my @f = split /\t/, $gfftext, 9;
    my @g = split /\s+/, $f[8], 4;
    my ($name2,$start2);
    @f[0,3,4] = transform(@f[0,3,4]);
    if ($pair || (@pairfeature && grep($f[2] =~ /$_/,@pairfeature))) {
	@g[0,1,2] = transform(@g[0,1,2]);
	if ($f[3] > $f[4]) { @g[1,2] = @g[2,1] }
	$f[8] = "@g";
	($name2,$start2) = @g[0,1];
    }
    if ($f[3] > $f[4]) {
	@f[3,4] = @f[4,3];
	if ($f[6] eq "+") { $f[6] = "-" }
	elsif ($f[6] eq "-") { $f[6] = "+" }
    }

    (join("\t",@f)."\n", $f[3], $name2, $start2);
}