awk & sed & perl 单行操作
1 2 3 4 5 6 7 8 awk '$5 == "abc123"' file.txt perl -alne 'print if $F[4]=="abc123"' awk '$5 != "abc123"' file.txt perl -alne 'print if $F[4]!="abc123"' awk -F, '$5 =="abc123"' file.txt perl -F, -alne 'print if $F[4]=="abc123"'
1 2 awk '$7 !~ /^[a-f]/' file.txt perl -alne 'print if $F[6] !~ /^[a-f]/'
1 2 awk '$3>$5' file.txt perl -alne 'print if $F[2]>$F[4]'
1 2 3 4 awk '{sum+=$1} END{print sum}' file.txt perl -alne ' {$sum+=$F[0]}; END{print"$sum\n"} ' file.txt awk '{sum+=$1} END{print x/NR}' file.txt
1 2 sed 's/^[\s\t]*//; s/[\s\t]*$//' file.txt sed '/^$//d' file.txt
find, xargs, sort, uniq 等
1 cat runinfo.txt | head -1 | tr '\t' '\n' |wc -l
1 shuf file.txt | head -n 10
1 cut -f 2 file.txt | sort |uniq -c |sort -k1 -nr
将所有.txt 文件修改为.bak(例如在对*.txt做操作之前用于文件备份)
1 find . -name "*.txt" |sed 's/\.txt$//' | xargs -i echo mv {}.txt {}.bak |sh
通过parallel并行运行12个fastqc任务
1 find 0_raw_data | parallel -j 10 "fastqc {} --outdir ."
直接定义一个extract函数,直接extract解压缩(修改.bashrc)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 extract () { if [ -f $1 ] ; then case $1 in *.tar.bz2) tar xvjf $1 ;; *.tar.gz) tar xvzf $1 ;; *.tar.xz) tar Jxvf $1 ;; *.bz2) bunzip2 $1 ;; *.rar) unrar x $1 ;; *.gz) gunzip $1 ;; *.tar) tar xvf $1 ;; *.tbz2) tar xvjf $1 ;; *.tgz) tar xvzf $1 ;; *.zip) unzip $1 ;; *.Z) uncompress $1 ;; *.7z) 7z x $1 ;; *) echo "don't know how to extract '$1'..." ;; esac else echo "'$1' is not a valid file!" fi }
1 echo 'ATTGCTATGCTNNNT' |rev |tr 'ACTG' 'TGAC'
1 2 cat file.txt | column -t |less -S csvtk pretty ### install csvtk
对samtools mpileup并行化call SNP
1 2 3 4 5 6 7 8 BAM="yourFile.bam" REF="reference.fasta" samtools view -H $BAM | grep "\@SQ" | sed 's/^.*SN://g' | cut -f 1 | xargs -I {} -n 1 -P 24 sh -c "samtools mpileup -BQ0 -d 100000 -uf $REF -r \"{}\" $BAM | bcftools call -cv > \"{}\".vcf"
对多个tsv/csv表根据表头columns对行rows合并。类似于dplyr中的left_join()
1 2 3 4 5 # csvtk concat names.csv names.reorder.csv |csvtk pretty # csvtk concat names.csv names.with-unmatched-colname.csv -i -u NA | csvtk pretty
参考资料