// Universal Sequence Maps (hoding Object for CGR operations 

console.log('CGR toolbox :-)');

// Overloading String and Array - watch out!

String.prototype.reverse=function(){ // sort characters of a string
    return this.split('').reverse().toString().replace(/,/g,'');
}

String.prototype.sort=function(){ // sort characters of a string
    return this.split('').sort().toString().replace(/,/g,'');
}

Array.prototype.max=function(){ // returns maximum value
    return this.reduce(function(x1,x2){if(x1>x2){return x1}else{return x2}});
}

Array.prototype.min=function(){ // returns maximum value
    return this.reduce(function(x1,x2){if(x1<x2){return x1}else{return x2}});
}

Array.prototype.sum=function(){ // returns sum of all values
    return this.reduce(function(x1,x2){return x1+x2});
}

Array.prototype.transpose=function(){ // written for arrays of arrays
    if (!Array.isArray(this[0])){
        M=this.map(function(x){return [x]})}
    else {M=this};
    var T=[];
    for (var i=0;i<M[0].length;i++){
        T[i]=[];
        for (var j=0;j<M.length;j++){T[i][j]=M[j][i]}
    }
    if ((Array.isArray(T[0]))&&(T[0].length==1)){T=T.map(function(x){return x[0]})}
    return T;
}

// Universal Sequence Mapping (USM)

usm = function (seq,abc,pack,seed){ // Universal Sequence Map
	
	this.loadFasta=function(url,callback,abc){// load long sequences from a FastA file
		// for example
		// url = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Escherichia_coli_K_12_substr__DH10B_uid58979/NC_010473.fna' <-- big bacteria
		// url='ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Acinetobacter_ADP1_uid61597/NC_005966.fna';
		// url='ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Streptococcus_pneumoniae_R6_uid57859/NC_003098.fna'
		// url='ftp://ftp.ncbi.nlm.nih.gov/genomes/Viruses/Streptococcus_phage_2972_uid15254/NC_007019.fna' <-- phage
		// u = new usm;u.loadFasta(url,function(x){console.log(x.length)})
		// if default proxy doesn't work try this one: jmat.webrwUrl='http://webrw.no.de'
		thisUsm = this;
		if (!!abc){this.abc=abc}
		//jmat.webrwUrl='http://webrw.no.de';
		//console.log('using proxy '+jmat.webrwUrl+' to get fastA file '+url);
		jmat.get(url,function(x){ // encode sequences
			if (!!callback){callback(x)} // in case this function was called with a callback do that first
			// let's encode it now
			console.log(x[0]); // fastA head identification, everything else should be a long sequence
			//x=x.splice(0,100); // <-- while debugging
			var n = x.length;
			thisUsm.seq = x.splice(1,n).reduce(function(a,b){return a+b}); // concatenate whole sequence
			x=''; // to free memory
			var nn = thisUsm.seq.length;
			console.log('... found '+nn+' units divited in '+n+' segments,');
			thisUsm.encodeLong(); // the seq will be picked from this.seq
			//thisUsm.encode();
			}
		)
		return 'using proxy '+jmat.webrwUrl+' to get fastA file '+url+' ...'
	}
	
	this.encodeLong = function(seq,abc,pack,seed){ // encoding long sequences by writting directly to the usm instance
		if(!!seq){this.seq=seq}
		if(!!abc){this.abc=abc}
        if (!this.seq){throw ('Sequence not provided')}
        if (!this.abc){
			console.log('find alphabet ...');
			this.abc=this.alphabet();	
		};
		console.log('... alphabet: '+this.abc);
        this.cube=[];
		console.log('packing USM space ...')
        this.str2cube(pack,true);
		console.log('...',this.cube);
		console.log('filling USM space ...');
        var m = this.cube.length, n = this.seq.length, i = 0;
        this.cgrForward = [];this.cgrBackward = [];
		//for(i=0;i<n;i++){this.cgrForward[i]=[[0]];this.cgrBackward[i]=[[0]]}
		for(i=0;i<m;i++){this.cgrForward[i]=[];this.cgrBackward[i]=[]}
        for (i=0;i<m;i++){
			console.log((i*2+1)+'/'+(this.cube.length*2)+' mapping axis <'+this.cube[i]+'> forward');
            this.cgrLong(i,'cgrForward');
			console.log((i*2+2)+'/'+(this.cube.length*2)+' mapping axis <'+this.cube[i]+'> backward');
			this.cgrLong(i,'cgrBackward');
            this.bin[i]=[]; // free memory
        }
		console.log('packing CGR coordinates ...')
		console.log('... forward ...')
		var c=[];
		for(i=0;i<n;i++){c[i]=[];for(j=0;j<m;j++){c[i][j]=this.cgrForward[j][i]}}
		this.cgrForward=c;
		console.log('... backward ...')
		var c=[];
		for(i=0;i<n;i++){c[i]=[];for(j=0;j<m;j++){c[i][j]=this.cgrBackward[j][i]}}
		this.cgrBackward=c.reverse();
		var c=[];
		console.log('USMapping done');
    }

	this.cgrLong=function(ii,direction,s){ // one dimension at a time
		var bin = this.bin[ii];
		var n = bin.length;//, bins=[], k=10; // binning threads
		if(direction=='cgrBackward'){bin.reverse();var c = this.cgrBackward[ii]}
		else{var c = this.cgrForward[ii]}
		if (!s){ // seed
			var s=Math.random();for(var i=n-128;i<n;i++){s=s+(bin[i]-s)/2}
		}
		//this[direction][ii]=[];
		//c[0][ii]=s+(bin[0]-s)/2;
		c[0]=s+(bin[0]-s)/2;
		for(var i=1;i<n;i++){
			//c[i][ii]=c[i-1][ii]+(bin[i]-c[i-1][ii])/2;
			c[i]=c[i-1]+(bin[i]-c[i-1])/2;
		}
		return 'done'
	}

    this.alphabet=function(seqString){//extracts alphabet
        if (!seqString){seqString=this.seq} //uses own string if argument not provided
        this.abc='';
        for (var i=0;i<seqString.length;i++){
            if (!this.abc.match(new RegExp(seqString[i]))){this.abc+=seqString[i]}
        }
        return this.abc.sort(); // using overloaded String.sort()
    }
    
    this.str2cube = function(pack,show){
        var m = this.abc.length;var n = this.seq.length;
        if (!pack){pack='compact'} // default packing method
        this.pack=pack;
        this.bin=[];
        switch (pack){
        case 'sparse':
            for (var j=0;j<m;j++){
                this.cube[j]=this.abc[j];
				this.bin[j]=this.seq.split('').map(function(si){
					if (si===this.abc[j]){return 0}
	                else {return 1}
				})
            }
            break;
        case 'compact':
            var L = Math.ceil(Math.log(m)/Math.log(2)); // map dimension
            var mm=Math.pow(2,L); // maximum length of this alphabet
            for (var j=0;j<L;j++){
                //this.bin[j]=[];
                var abc='';mm=mm/2;
                for (var i=0;i<m;i=i+mm*2){
                    abc+=this.abc.slice(i,i+mm);
                }
                this.cube[j]=abc;
				if (show){console.log((j+1)+'/'+L+' filling axis <'+abc+'>')}
				this.bin[j]=this.seq.split('').map(function(si){
				    if (abc.match(new RegExp(si))){return 0}
	                else {return 1}	
				})
            }
            break;
        //default :
        //  this.alphabet
        }
    }
        
    this.cgr = function(bin,s,ith){ // CGR with recursive seed
		if (!ith){ith=0}
        var n = bin.length;
		if (!s){s=bin[bin.length-1]} // start seed with last value of bin
        var y=[];
		y[0]=s+(bin[0]-s)/2;
		for(var i=1;i<n;i++){
			y[i]=y[i-1]+(bin[i]-y[i-1])/2;
		}
		// check recursive seed
		if ((s!=y[y.length-1])&(ith<64)){y=this.cgr(bin,y[y.length-1],ith+1)}
        return y;
    }

	this.cgr2 = function(bin,seed){ // CGR with 1/2 seed
		var y=[1/2+(bin[0]-1/2)/2];
		for(var i=1;i<bin.length;i++){y[i]=y[i-1]+(bin[i]-y[i-1])/2}
		return y;
	}

    this.transpose = function(M){
        var T=[];
		//try{
		for (var i=0;i<M[0].length;i++){
            T[i]=[];
            for (var j=0;j<M.length;j++){T[i][j]=M[j][i]}
        }
		//}
		//catch(err){
		//console.log(err);
		//  lala = 4;
		//}
        return T;
    }

	this.transpose2 = function(M){
        M[0].map(function(mi,i){var MM=[];})
    }

    this.encode = function(seq,abc,pack,seed){
        if (!seq){seq=this.seq}
        else {this.seq=seq}
        if (!this.seq){throw ('Sequence not provided')}
        if (abc){if(abc.length==0){var abc = undefined}} // in case abc=''
        if (abc){this.abc=abc}
        //if (!this.abc){this.abc=''}
        if (!this.abc){this.abc=this.alphabet()};
        var m = this.abc.length;var n = this.seq.length;
        this.cube=[];
        this.str2cube(pack);
        this.cgrForward = [];
        this.cgrBackward = [];
		if (!seed){
        	for (var i=0;i<this.bin.length;i++){
            	this.cgrForward[i]=this.cgr(this.bin[i]);
            	this.cgrBackward[i]=this.cgr(this.bin[i].slice().reverse()).slice().reverse();
        	}
		}
		else{ // seeded CGR
			for (var i=0;i<this.bin.length;i++){
            	this.cgrForward[i]=this.cgr2(this.bin[i],seed);
            	this.cgrBackward[i]=this.cgr2(this.bin[i].slice().reverse(),seed).slice().reverse();
        	}
		}
        //delete this.bin; // comment out if .bin is of no use <---<---<--- NOTE!
        this.cgrForward=this.transpose(this.cgrForward);
        this.cgrBackward=this.transpose(this.cgrBackward);
		this.usm=[];
		for (var i=0;i<n;i++){ // In a serious application .usm is all we'd need to keep .cgr--- etc could all be deleted
			this.usm[i]=[this.cgrForward[i],this.cgrBackward[i]];
		}
    }

    this.decodeBin = function(x,n){// decompose single coordinate, x, into a binary array of length <= n
        if (!n){n=Infinity}
        var y = [];var i = 0;
        x=x*2;var z=[];
        y[0]=x; // just in case x is 0, 1 or even 1/2 (impossible) y is still populated
        while ((x!=Math.round(x))&(i<n)){
            if (x>1){y[i]=1;x=x-1}
            else{y[i]=0}
            x = x*2;i++;
        }
        return y;
    }

    this.bin2int = function (B){//converts binary vector into integer
        return B.slice().reverse().map(function(x,i){return x*Math.pow(2,i)}).reduce(function(a,b){return a+b});
    }

    this.decode = function(xy,n){// decode numerical coordinates
        var bin2int = this.bin2int;
        var decodeBin = this.decodeBin;
        var abc = this.abc;
        var bb = xy.map(function(x){return decodeBin(x)});
        bb = this.transpose(bb).map(function(x){return bin2int(x)});
        bb = bb.map(function(x){return abc[x]});
        return bb.toString().replace(/,/g,'');
    }

    // run USM map automatically is a sequence is provided
    
    this.L = function (a,b){ // distance between two coordinates
        var d=0;
        while((Math.pow(2,d)!=Infinity)&Math.round(a*Math.pow(2,d))==Math.round(b*Math.pow(2,d))){d++}
		if(Math.pow(2,d)==Infinity){d=64} // stop at the numerical resolution
        return d;
    }

    this.distCGR = function (a,b){ // distance between two CGR positions, a and b
        //var ab=this.transpose([a,b]); // such that each position is an array of elements with two coordinates, one from each sequence
        var dist = this.L;
        return this.transpose([a,b]).map(function(x){return dist(x[0],x[1])}).min();
    }
    
    this.mapReduce = function (x,map,reduce){
        return reduce(x.map(map))
    }

    this.distProfile= function (s){// Distance to another sequence, s
        if (typeof(s)=='string'){
            s=new usm(s,this.abc,this.pack);
        }
        if (s.abc!==this.abc){throw('unequal alphabets')}
        if (s.pack!==this.pack){throw('unequal encode packing')}
        //var dist = this.distCGR;
        //var distCoord
        //var dist = function(a,b){return this.transpose([a,b]).map(function(x){return this.L(x[0],x[1])}).min()};
        var f = this.cgrForward.map(function(x){var x0=x; return s.cgrForward.map(function(x){return s.distCGR(x0,x)}).sum()});
        var b = this.cgrBackward.map(function(x){var x0=x; return s.cgrBackward.map(function(x){return s.distCGR(x0,x)}).sum()});

        return [f,b].transpose().sum();
    }

	this.dist = function (x,y){ // Equation 5: distance between two usm coordinates for a position, i
		                        // each provided as a two element Array [cgrForward[i],cgrBackward[i]]
		var d=this.distCGR(x[0],y[0])+this.distCGR(x[1],y[1])-1;
		if (d<0){d=0}
		return d
	}

	this.distMap=function (sprobe,sbase){// Distance Map to a new probing sequence, sprobe
		if (typeof(sprobe)=='string'){sprobe = new usm(sprobe,this.abc,this.pack)} // in case the actual sequence was inputed
		if (!sbase){sbase=this} //
		return sbase.usm.map(function(x){return sprobe.usm.map(function(y){return sbase.dist(x,y)})});
	}
	
	this.align=function(sprobe,sbase){ //align sequence sprobe to this sequence
		if (typeof(sprobe)=='string'){sprobe = new usm(sprobe,this.abc,this.pack)} // in case the actual sequence was inputed
		if (!sbase){sbase=this} //
		// start by considering a complete match and zoom in until such a subset is found
		var n = sprobe.seq.length , nn = sbase.seq.length;
		var A={posBase:[0],posProbe:[0],match:[0],ind:0}
		this.alignUsm=function(posStart,posEnd){ // defined here to capture align closure
			console.log([posStart,posEnd]);
			var inc = Math.floor((posEnd-posStart)/2);// increment
			var i=posStart+inc;
			var res=32;//some resolution
			//this.distCGR(x[0],y[0])+this.distCGR(x[1],y[1]) Eq 5
			var d = sbase.cgrForward.map(function(cf,ii){return sbase.distCGR(cf,sprobe.cgrForward[i])+sbase.distCGR(sbase.cgrBackward[ii],sprobe.cgrBackward[i])});
			var mm = jmat.max2(d); // lets code it here so we don't need jmat just yet:
			//var mm = d.reduce(function(a,b){return Math.max(a,b)});
			if(mm[0]>0){mm[0]-=1};// var ind = mm[0],dmax=mm[1];
			var j=A.posProbe.length;
			//var dF=sbase.distCGR(sbase.usm[mm[1]][0],sprobe.usm[i][0]); // forward distance
			var dF=sbase.distCGR(sbase.cgrForward[mm[1]],sprobe.cgrForward[i]); // forward distance
			if(dF>0){dF-=1}
			A.posBase[j]=mm[1]-dF; // using foward CGR to find start position
			if(A.posBase[j]<0){mm[0]-=(dF-mm[1]);dF=mm[1];A.posBase[j]=0} // check lower boundary for Base
			if((A.posBase[j]+dF)>nn){mm[0]-=A.posBase[j]+mm[0]-nn} // check upper boundary for Base
			A.posProbe[j]=i-dF;
			if(A.posProbe[j]<0){mm[0]-=(dF-i);dF=i;A.posProbe[j]=0;A.posBase[j]=mm[1]-dF;} // check lower boundary for Probe
			if((A.posProbe[j]+mm[0])>n){mm[0]-=A.posProbe[j]+mm[0]-n} // check upper boundary of Probe
			if(mm[0]>res){ // check if numerial resolution might have been exceeded
							var dFi=sbase.distCGR(sbase.cgrForward[A.posBase[j]],sprobe.cgrForward[A.posProbe[j]])-1; // additional forward distance
							while(dFi>0){ // if some was found
								A.posProbe[j]-=dFi;A.posBase[j]-=dFi;
								mm[0]+=dFi;
								dFi=sbase.distCGR(sbase.cgrForward[A.posBase[j]],sprobe.cgrForward[A.posProbe[j]])-1;
							}
							dFi=sbase.distCGR(sbase.cgrBackward[A.posBase[j]+mm[0]-1],sprobe.cgrBackward[A.posProbe[j]+mm[0]-1])-1; // backward distance at the end of match segment
							while(dFi>0){ // if some was found
								mm[0]+=dFi;
								dFi=sbase.distCGR(sbase.cgrBackward[A.posBase[j]+mm[0]-1],sprobe.cgrBackward[A.posProbe[j]+mm[0]-1])-1;
							}
						}
			A.match[j]=mm[0];
			if(mm[0]>(A.match[A.ind])){A.ind=j}; // if this is the best match yet
			if(A.match[A.ind]<inc){ // if best match is shorter than increment, zoom into its halves
				inc = Math.floor(inc/2);
				this.alignUsm(posStart,inc);
				this.alignUsm(inc+1,posEnd);
			}
		}
		this.alignUsm(0,n);
		console.log('largest identical segment has length '+A.match[A.ind]+' and aligns with position '+A.posBase[A.ind]+' in base sequence and position '+A.posProbe[A.ind]+' in probe sequence');
		return A
		//sbase.usm.map(function(x){return sprobe.usm.map(function(y){return sbase.dist(x,y)})});
	}

    this.alignQ = function (sprobe, sbase) {
     // This function provides an asynchronous wrapper for the usual
     // `this.align` method using Quanah (http://wilkinson.github.com/quanah/).
        if (Object.prototype.hasOwnProperty('Q') === false) {
            throw new Error('Quanah is not loaded.');
        }
        var Q, that, y;
        Q = Object.prototype.Q;
        that = this;                    //- the current USM object
        y = Q.avar();
        y.Q(function (evt) {
            if (that.hasOwnProperty('cgrBackward') === false) {
                window.setTimeout(y.revive, 0);
                return evt.stay('Waiting for indexing to finish ...');
            }
            y.val = that.align(sprobe, sbase);
            return evt.exit();
        });
        return y;
    };

    if (seq){// find out if this is a sequence or the url of a fastA file with one
		if(seq.length>15){ // it could be a url
			if(!!seq.slice(0,10).match(/:\/\//)){
				console.log('using proxy '+jmat.webrwUrl+'\nto get fastA file '+seq+' ...');
				this.loadFasta(seq,function(x){console.log('... file loaded ...')}, abc)}
			else{this.encode(seq,abc,pack)}
		}
		else{this.encode(seq,abc,pack,seed)}
	}
	else {if (abc){this.encode(abc,abc,pack,seed)}} // if alphabet is provided then identify cube anyway to enable decoding

}

usmDist=function(s1,s2,opt){ // Compares two USM encoded sequences
	if (!opt){opt='matrix'};
	switch (opt)
	{
	case 'matrixForward':
		return s1.cgrForward.map(function(x){var x0=x; return s2.cgrForward.map(function(x){return s2.distCGR(x0,x)})});
		break;

	case 'matrixBackward':
		return s1.cgrBackward.map(function(x){var x0=x; return s2.cgrBackward.map(function(x){return s2.distCGR(x0,x)})});
		break;

	case 'matrix':
		//var f1,b1,f2,b2;
		//create two mirror (forward and backward) version of s1 and s2
		fb1=s1.transpose([s1.cgrForward,s1.cgrBackward]);
		fb2=s2.transpose([s2.cgrForward,s2.cgrBackward]);
		return fb1.map(function(x){var x0=x; return fb2.map(function(x){var res=s1.distCGR(x0[0],x[0])+s1.distCGR(x0[1],x[1]);if(res>1){res=res-1};return res})});
		break;

	case 'sum':
		break;

	case 'max':
		break;

	case 'min':
		break;


	}
}

// load jmat
if(typeof(jmat)=='undefined'){
	var s=document.createElement('script');
	s.src='https://usm.github.io/jmat.js';
	document.head.appendChild(s);
}