The easiest way to process fastq files from 16S Illumina data is to use Qiime. Follow the link for a tutorial:
http://http://qiime.org/tutorials/tutorial.html
This is installed on the darwin cluster (beagle) through the command:
module add qiime-default
Only the dependencies required for the default Qiime pipeline are loaded.
#! /bin/sh
#$ -S /bin/bash
# -cwd
source /etc/profile.d/modules.sh
module load qiime-default
#define your file paths
#where is the fastq file you want to process
SOLFILEF=
#Where it the assoicated mapping file
MAPFILE=
#the oligo file
OLIGO=
#where the programs are called from
BIN=/data/spacocha/bin
#where you want to put the output of the analysis
UNIQUE=
#Make the fake barcode read
perl $
/fastq2Qiime_barcode.pl $
> $
_bar.txt
#Split your fastq file with qiime
split_libraries_fastq.py -i $
-b $
_bar.txt -o $
_output -m $
--barcode_type 7 --min_per_read_length 77 --last_bad_quality_char
E -r 0 --max_barcode_errors 0
#Trim the sequences with mothur
$
/mothur/Mothur.source/mothur "#trim.seqs(fasta=$
_output/seqs.fna, oligos=$
)"
#revert the names from mothur to qiime names
perl $
/revert_names_mothur.pl $
_output/seqs.fna $
_output/seqs.trim.fasta > $
_output/seqs.trim.names.fasta
perl $
/truncate_fasta.pl $
_output/seqs.trim.names.fasta 77 >$
_output/seqs.trim.names.77.fasta
#Now pick otus from the qiime greengenesg ref
python /home/software/python/python-27/software/qiime/qiime-1.3.0/bin//pick_otus.py -i $
_output/seqs.trim.names.77.fasta -o $
_output/ucrC/ -r /d
ata/spacocha/Qiime_dir/gg_otus_4feb2011/rep_set/gg_97_otus_4feb2011.fasta -C -m uclust_ref
#make an otu table
python /home/software/python/python-27/software/qiime/qiime-1.3.0/bin//make_otu_table.py -i $
_output/ucrC/seqs.trim.names_otus.txt -t /data/spacocha/Qii
me_dir/gg_otus_4feb2011/taxonomies/greengenes_tax.txt -o $
_output/ucrC/seqs_otus.mat