blastn -query ../Ncommune_rbcX.fa -db nt -evalue 1e-180 -max_target_seqs 3000 -out Nostoc_rbcX.bl -outfmt '6 qseqid qlen sacc slen pident length mismatch gapopen qstart qend qframe sstart send sframe evalue bitscore' cut -f3 Nostoc_rbcX.bl |sort|uniq|wc -l (310) awk '{if($4>3000) print $0'} Nostoc_rbcX.bl (1: CP001037) awk '{if($12>$13) print $0'} Nostoc_rbcX.bl |wc -l (0) cut -f 3 Nostoc_rbcX.bl |grep -v CP001037 |sort > Nostoc_rbcX_acc.txt blastdbcmd -db nt -entry_batch Nostoc_rbcX_acc.txt |../Scripts/GetRedundant.pl >Nostoc_rbcX.fa grep ">" Nostoc_rbcX.fa |wc -l (521) grep ">" Nostoc_rbcX.fa |perl -p -e 's/^>\s*gi\|\d+\|\w+\|(\w+)\.\d\|.*/$1/' >Nostoc_rbcX_acc.txt wc -l Nostoc_rbcX_acc.txt (521) cat Nostoc_rbcX_acc.txt | ../Scripts/GetGB.pl Nostoc_rbcX.gb heath.obrien@gmail.com grep LOCUS Nostoc_rbcX.gb |wc -l (521) ../Scripts/ParseHost.pl Nostoc_rbcX.gb >Nostoc_rbcX_metadata.txt [Get info on missing seqs from studies covered] cat missing_seqs.txt | ../Scripts/GetGB.pl missing_seqs.gb heath.obrien@gmail.com grep "LOCUS" missing_seqs.gb |wc -l (47) cat missing_seqs.txt | ../Scripts/GetFasta.pl missing_seqs.gb heath.obrien@gmail.com grep ">" missing_seqs.fa |wc -l (48) ../Scripts/ParseHost.pl missing_seqs.gb >>Nostoc_rbcX_metadata.txt [Add in CP001037 sequence and metadata ] cat missing_seqs.fa >>Nostoc_rbcX.fa grep ">" Nostoc_rbcX.fa |wc -l (569) cat missing_seqs.gb >>Nostoc_rbcX.gb grep LOCUS Nostoc_rbcX.gb|wc -l (568) cat duplicates.txt | perl ../Scripts/AddDuplicates.pl Nostoc_rbcX_metadata.txt >Nostoc_rbcX_metadata2.txt wc -l Nostoc_rbcX_metadata2.txt (731) usearch -cluster_fast Nostoc_rbcX.fa -id 1 -centroids Nostoc_rbcX_nr.fa -uc Nostoc_rbcX_groups.txt ../Scripts/GetGroups.py -g Nostoc_rbcX_groups.txt -m Nostoc_rbcX_metadata2.txt >Nostoc_rbcX_metadata.txt perl -pi -e 's/^>\s*gi\|\d+\|\w+\|(\w+)\.\d\|.*/>$1/' Nostoc_rbcX_nr.fa mafft Nostoc_rbcX_nr.fa >Nostoc_rbcX_aln.fa ../Scripts/GetExcluded.pl Nostoc_rbcX_aln.fa |pbcopy trimal -in Nostoc_rbcX_aln.fa -phylip -select `pbpaste` >Nostoc_rbcX.phy phyml -i Nostoc_rbcX.phy ../Scripts/AddSig.pl Nostoc_rbcX.phy_phyml_tree.nwk >Nostoc_rbcX.nwk cat Nostoc_rbcX.nwk | ../Scripts/AddMetadata.pl Nostoc_rbcX_metadata2.txt host >Nostoc_rbcX_host.nwk ../Scripts/ColourTree.pl Nostoc_rbcX_host family >Nostoc_rbcX_host_col.nwk