首页 > 编程语言 > 详细

多线程拆分bam文件

时间:2015-06-01 16:42:55      阅读:374      评论:0      收藏:0      [点我收藏+]
#!perl
use warnings;
#use strict;
use threads;
use Thread::Semaphore;
use File::Basename qw(basename);

die "perl $0 <bam> <threads>\n" if @ARGV != 2;

my $semaphore = Thread::Semaphore->new($ARGV[1]);
my $id = basename($ARGV[0], ".bam");
if(-s "$ARGV[0].bai")
{
	
}else{
	`samtools index $ARGV[0]`;
}
my $outdir = "${id}_split";
mkdir $outdir;

my (%hash, $hd, $rg, $pg);
open HEAD, "samtools view -H $ARGV[0] |" or die $!;
while(<HEAD>)
{
	if(/^\@SQ/)
	{
		my ($chr) = $_ =~ /SN:(\S+)/;
		$hash{$chr} = $_;
		next;
	}
	if(/^\@HD/)
	{
		$hd .= "$_";
		next;
	}
	if(/^\@RG/)
	{
		$rg .= "$_";
		next;
	}
	if(/^\@PG/)
	{
		$pg .= "$_";
		next;
	}
}

foreach(keys %hash)
{
	$semaphore->down();
	my $thread = threads->create(\&splitchr, $_);
	$thread->detach();
}

&waitDone;

sub waitDone{
	my $num = 0;
	while($num < $ARGV[1])
	{
		$semaphore->down();
		$num ++;
	}
}

sub splitchr{
	my $chr = shift;
	open $chr, "> $outdir/$id.$chr.sam" or die $!;
	print $chr "$hd$hash{$chr}$rg$pg";
	my $content = `samtools view $ARGV[0] $chr`;
	print $chr "$content";
	close $chr;
	`samtools view -bS $outdir/$id.$chr.sam > $outdir/$id.$chr.bam`;
	`rm $outdir/$id.$chr.sam -rf`;
}

仅仅适合内存较大的集群~

多线程拆分bam文件

原文:http://blog.csdn.net/skenoy/article/details/46311623

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!