首页 > 其他 > 详细

文本处理之awk进阶

时间:2021-04-24 20:58:31      阅读:25      评论:0      收藏:0      [点我收藏+]

模式PATTERN

PATTERN:根据pattern条件,过滤匹配的行,再做处理
   如果未指定:空模式,匹配每一行

范例:

[root@CentOS-8 ~]# awk -F: ‘{print $1,$3}‘ /etc/passwd
root 0
bin 1
daemon 2
adm 3

/regular expression/:仅处理能够模式匹配到的行,需要用/ /括起来

范例

[root@CentOS-8 ~]# awk ‘/^UUID/{print $1}‘ /etc/fstab 
UUID=ab2f340e-cdb8-4152-95b4-a626bc3323ea
[root@CentOS-8 ~]# awk ‘!/^UUID/{print $1}‘ /etc/fstab 

#
#
#
#
#
#
#
#
#
#
/dev/mapper/cl-root
/dev/mapper/cl-swap
[15:38:36 root@Bj-Ubuntu ~]# df | awk ‘/^\/dev\/sd/‘
/dev/sda2       25671908 3344824  20999980  14% /
/dev/sda3       15416264   40984  14572464   1% /data

relational expression: 关系表达式,结果为“真”才会被处理

真:结果为非0值,非空字符串

假:结果为空字符串或0值

范例:

[15:39:15 root@Bj-Ubuntu ~]# seq 10 | awk ‘1‘
1
2
3
4
5
6
7
8
9
10
[15:40:13 root@Bj-Ubuntu ~]# seq 10 | awk ‘0‘
[15:40:30 root@Bj-Ubuntu ~]# seq 10 | awk ‘false‘
[15:40:54 root@Bj-Ubuntu ~]# seq 10 | awk ‘"false"‘
1
2
3
4
5
6
7
8
9
10
[15:41:27 root@Bj-Ubuntu ~]# seq 10 | awk ‘""‘
[15:41:35 root@Bj-Ubuntu ~]# seq 10 | awk ‘"0"‘
1
2
3
4
5
6
7
8
9
10
[15:41:40 root@Bj-Ubuntu ~]# seq 10 | awk ‘true‘
[15:41:55 root@Bj-Ubuntu ~]# seq 10 | awk ‘long‘
[15:42:02 root@Bj-Ubuntu ~]# seq 10 | awk ‘0‘
[15:42:06 root@Bj-Ubuntu ~]# seq 10 | awk ‘" "‘
1
2
3
4
5
6
7
8
9
10

范例:

[15:42:58 root@Bj-Ubuntu ~]# awk ‘1‘ /etc/issue
Ubuntu 18.04.4 LTS \n \l

[15:43:23 root@Bj-Ubuntu ~]# awk ‘0‘ /etc/issue
[15:43:39 root@Bj-Ubuntu ~]# awk ‘"long"‘ /etc/issue
Ubuntu 18.04.4 LTS \n \l

范例:

seq 10 | awk ‘i=0‘
seq 10 | awk ‘i=1‘
seq 10 | awk ‘i=!i‘
seq 10 | awk ‘{i=!i;print i}‘
seq 10 | awk ‘!(i=!i)‘
seq 10 | awk -v i=1 ‘i=!i‘

范例:

[15:45:24 root@Bj-Ubuntu ~]# seq 10 | awk -v i=1 ‘i=!i‘
2
4
6
8
10
[15:45:28 root@Bj-Ubuntu ~]# seq 10 | awk i=!i
seq 10 | awk i=ip a
awk: fatal: cannot open file `a‘ for reading (No such file or directory)
[15:46:03 root@Bj-Ubuntu ~]# seq 10 | awk ‘i=!i‘
1
3
5
7
9
[15:46:30 root@Bj-Ubuntu ~]# seq 10 | awk ‘!(i=!i)‘
2
4
6
8
10
[15:46:40 root@Bj-Ubuntu ~]# seq 10 | awk ‘{i=!i;print i}‘
1
0
1
0
1
0
1
0
1
0

范例:

awk -F: ‘i=1;j=1{print i,j}‘ /etc/passwd
Awk -F: ‘$3>=1000{print $1,$3}‘ /etc/passwd
awk -F: ‘$3<1000{print $1,$3}‘ /etc/passwd
awk -F: ‘$NF=="/bin/bash"{print $1,$NF}‘ /etc/passwd
[root@centos8 ~]#awk -F: ‘$NF=="/bin/bash"{print $1,$NF}‘ /etc/passwd
root /bin/bash
wang /bin/bash
long /bin/bash
[root@centos8 ~]#awk -F: ‘$NF ~ /bash$/{print $1,$NF}‘ /etc/passwd
root /bin/bash
wang /bin/bash
long /bin/bash

line ranges:行范围

不支持直接用行号,但可以使用变量NR间接指定行号

/pat1/,/pat2/ 不支持直接给出数字格式

范例:

[15:47:26 root@Bj-Ubuntu ~]# seq 10 | awk NR>=3 && NR<=6
-bash: =6: No such file or directory
[15:51:05 root@Bj-Ubuntu ~]# seq 10 | awk ‘NR>=3 && NR<=6‘
3
4
5
6
[15:51:13 root@Bj-Ubuntu ~]# awk ‘NR>=3 && NR<=6{print NR,$0}‘ /etc/passwd
3 bin:x:2:2:bin:/bin:/usr/sbin/nologin
4 sys:x:3:3:sys:/dev:/usr/sbin/nologin
5 sync:x:4:65534:sync:/bin:/bin/sync
6 games:x:5:60:games:/usr/games:/usr/sbin/nologin
[16:09:11 root@Bj-Ubuntu ~]# awk ‘/^bin/,/^man/‘ /etc/passwd
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
# 后面的匹配不到,所以一直匹配完整个文档打印出来到屏幕
[16:07:56 root@Bj-Ubuntu ~]# awk ‘/^bin/,/^adm/‘ /etc/passwd
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
backup:x:34:34:backup:/var/backups:/usr/sbin/nologin
list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
systemd-network:x:100:102:systemd Network Management,,,:/run/systemd/netif:/usr/sbin/nologin
systemd-resolve:x:101:103:systemd Resolver,,,:/run/systemd/resolve:/usr/sbin/nologin
syslog:x:102:106::/home/syslog:/usr/sbin/nologin
messagebus:x:103:107::/nonexistent:/usr/sbin/nologin
_apt:x:104:65534::/nonexistent:/usr/sbin/nologin
lxd:x:105:65534::/var/lib/lxd/:/bin/false
uuidd:x:106:110::/run/uuidd:/usr/sbin/nologin
dnsmasq:x:107:65534:dnsmasq,,,:/var/lib/misc:/usr/sbin/nologin
landscape:x:108:112::/var/lib/landscape:/usr/sbin/nologin
pollinate:x:109:1::/var/cache/pollinate:/bin/false
sshd:x:110:65534::/run/sshd:/usr/sbin/nologin
long:x:1000:1000:long:/home/long:/bin/bash
statd:x:111:65534::/var/lib/nfs:/usr/sbin/nologin
www:x:1001:1001::/home/www:/bin/bash

操作符

算术操作符:

x+y, x-y, x*y, x/y, x^y, x%y
-x:转换为负数
+x:将字符串转换为数值

字符串操作符:没有符号的操作符,字符串连接

赋值操作符:

=, +=, -=, *=, /=, %=, ^=,++, --

范例:

[root@longwang ~]# awk ‘BEGIN{i=0;print i++,i}‘
0 1
[root@longwang ~]# awk ‘BEGIN{i=0;print ++i,i}‘
1 1

范例:

[root@longwang ~]# seq 10 | awk ‘n++‘
2
3
4
5
6
7
8
9
10
[root@longwang ~]# awk -v n=0 ‘!n++‘ /etc/passwd
root:x:0:0:root:/root:/bin/bash
[root@longwang ~]# awk -v n=0 ‘!n++{print n}‘ /etc/passwd
1
[root@longwang ~]# awk -v n=1 ‘!n++{print n}‘ /etc/passwd
[root@longwang ~]# awk -v n=0 ‘!++n{print n}‘ /etc/passwd

[root@longwang ~]# awk -v n=0 ‘!++n‘ /etc/passwd
[root@longwang ~]# awk -v n=-1 ‘!++n‘ /etc/passwd
root:x:0:0:root:/root:/bin/bash

比较操作符:

==, !=, >, >=, <, <=

范例

[root@longwang ~]# awk ‘NR==2‘ /etc/issue
Kernel \r on an \m
[root@longwang ~]# awk -F: ‘$3>=1000‘ /etc/passwd
nobody:x:65534:65534:Kernel Overflow User:/:/sbin/nologin
long:x:1000:1000::/home/long:/bin/bash
longwang:x:1001:1001::/home/longwang:/bin/bash
longe:x:1002:1002::/home/longe:/bin/bash
longee:x:1003:1003::/home/longee:/bin/bash
longgge:x:1004:1004::/home/longgge:/bin/bash
xixi:x:1005:1005::/home/xixi:/bin/bash
haha:x:1006:1006::/home/haha:/bin/bash
edison:x:1007:1007::/home/edison:/bin/bash
www:x:1008:1008::/home/www:/bin/bash

范例:取奇,偶数行

[root@centos8 ~]#seq 10 | awk ‘NR%2==0‘
2
4
6
8
10
[root@centos8 ~]#seq 10 | awk ‘NR%2==1‘
1
3
5
7
9
[root@centos8 ~]#seq 10 | awk ‘NR%2!=0‘
1
3
5
7
9

模式匹配符

~ 左边是否和右边匹配,包含关系
!~ 是否不匹配

范例

[root@longwang ~]# awk -F: ‘$0 ~ /root/{print $1}‘ /etc/passwd
root
operator
[root@longwang ~]# awk -F: ‘$0 ~ /^root/{print $1}‘ /etc/passwd
root
[root@longwang ~]# awk -F: ‘$0 !~ /root‘ /etc/passwd
awk: cmd. line:1: $0 !~ /root
awk: cmd. line:1:        ^ unterminated regexp
[root@longwang ~]# 
[root@longwang ~]# awk -F: ‘$0 !~ /root/‘ /etc/passwd
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
[root@longwang ~]# awk ‘/root/‘ /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin
[root@longwang ~]# awk -F: ‘/r/‘ /etc/passwd
root:x:0:0:root:/root:/bin/bash
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
operator:x:11:0:operator:/root:/sbin/nologin
games:x:12:100:games:/usr/games:/sbin/nologin
ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin
nobody:x:65534:65534:Kernel Overflow User:/:/sbin/nologin
systemd-coredump:x:999:997:systemd Core Dumper:/:/sbin/nologin
systemd-resolve:x:193:193:systemd Resolver:/:/sbin/nologin
tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin
polkitd:x:998:996:User for polkitd:/:/sbin/nologin
unbound:x:997:995:Unbound DNS resolver:/etc/unbound:/sbin/nologin
sssd:x:996:993:User for sssd:/:/sbin/nologin
sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin
saslauth:x:995:76:Saslauthd user:/run/saslauthd:/sbin/nologin
[root@longwang ~]# 
[root@longwang ~]# awk -F: ‘$3==0‘ /etc/passwd
root:x:0:0:root:/root:/bin/bash
[16:26:53 root@Bj-Ubuntu ~]# ifconfig eth0 | awk ‘NR==2{print $2}‘
172.31.0.19

逻辑操作符

与:&&,并且关系
或:||,或者关系
非:!,取反

范例:!取反

[16:27:15 root@Bj-Ubuntu ~]# awk ‘BEGIN{print i}‘

[16:28:33 root@Bj-Ubuntu ~]# awk ‘BEGIN{print !i}‘
1
[16:28:42 root@Bj-Ubuntu ~]# awk -v i=10 ‘BEGIN{print !i}‘
0
[16:29:12 root@Bj-Ubuntu ~]# awk -v i=3 ‘BEGIN{print !i}‘
0
[16:29:22 root@Bj-Ubuntu ~]# awk -v i=0 ‘BEGIN{print !i}‘
1
[16:29:26 root@Bj-Ubuntu ~]# awk -v i=abc ‘BEGIN{print !i}‘
0
[16:29:32 root@Bj-Ubuntu ~]# awk -v i=‘‘ ‘BEGIN{print !i}‘
1

范例:

awk -F: ‘$3>=0 && $3<=1000 {print $1,$3}‘ /etc/passwd
awk -F: ‘$3==0 || $3>=1000 {print $1,$3}‘ /etc/passwd
awk -F: ‘!($3==0) {print $1,$3}‘ /etc/passwd
awk -F: ‘!($3>=500) {print $1,$3}‘ /etc/passwd

条件表达式(三目表达式)

selector?if-true-expression:if-false-expression

范例

awk -F: ‘{$3>=1000?usertype="Common User":usertype="SysUser";printf
"%-20s:%12s\n",$1,usertype}‘ /etc/passwd
[root@centos8 ~]#df | awk -F"[ %]+" ‘/^\/dev\/sd/{$(NF-1)>10?
disk="full":disk="OK";print $(NF-1),disk}‘
3 OK
1 OK
13 full

条件判断 if-else

语法

if(condition){statement;…}[else statement]
if(condition1){statement1}else if(condition2){statement2}else if(condition3)
{statement3}...... else {statementN}

使用场景:对awk取得的整行或某个字段做条件判断

[16:29:40 root@Bj-Ubuntu ~]# awk -F: ‘{if($3>=1000)print $1,$3}‘ /etc/passwd
nobody 65534
long 1000
www 1001
[root@longwang ~]# awk -F: ‘{if($3<=100){print "<=100",$3}else if ($3<=1000){print "<=1000",$3}else{print ">=1000",$3}}‘ /etc/passwd
<=100 0
<=100 1
<=100 2
<=100 3
<=100 4
<=100 5
<=100 6
<=100 7
<=100 8
<=100 11
<=100 12
<=100 14
>=1000 65534
<=100 81
<=1000 999
<=1000 193
<=100 59
<=1000 998
<=1000 997
<=1000 996
<=100 74
<=1000 995
<=100 50
<=1000 1000
>=1000 1001
>=1000 1002
>=1000 1003
>=1000 1004
>=1000 1005
>=1000 1006
>=1000 1007
>=1000 1008
[root@longwang ~]# awk -F: ‘{if($NF=="bin/bash")print $1}‘ /etc/passwd
[root@longwang ~]# awk ‘{if(NF>5)print $0}‘ /etc/fstab 
# Created by anaconda on Fri Sep  4 21:10:02 2020
# Accessible filesystems, by reference, are maintained under ‘/dev/disk/‘.
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info.
# After editing this file, run ‘systemctl daemon-reload‘ to update systemd
# units generated from this file.
/dev/mapper/cl-root     /                       xfs     defaults        0 0
UUID=232316f3-6dff-487a-9fae-af211542097b /boot                   ext4    defaults        1 2
/dev/mapper/cl-data     /data                   xfs     defaults        0 0
/dev/mapper/cl-swap     swap                    swap    defaults        0 0
[root@longwang ~]# awk -F: ‘{if($3>=1000){printf "common user: %s\n",$1} else {printf "root or Sysuser: %s\n",$1}}‘ /etc/passwd
root or Sysuser: root
root or Sysuser: bin
root or Sysuser: daemon
root or Sysuser: adm
root or Sysuser: lp
root or Sysuser: sync
root or Sysuser: shutdown
root or Sysuser: halt
root or Sysuser: mail
root or Sysuser: operator
root or Sysuser: games
root or Sysuser: ftp
common user: nobody
root or Sysuser: dbus
root or Sysuser: systemd-coredump
root or Sysuser: systemd-resolve
root or Sysuser: tss
root or Sysuser: polkitd
root or Sysuser: unbound
root or Sysuser: sssd
root or Sysuser: sshd
root or Sysuser: saslauth
root or Sysuser: apache
common user: long
common user: longwang
common user: longe
common user: longee
common user: longgge
common user: xixi
common user: haha
common user: edison
common user: www
[16:39:59 root@Bj-Ubuntu ~]# df -h | awk -F% ‘/^\/dev\/sd/{print $1}‘
/dev/sda2        25G  3.2G   21G  14
/dev/sda3        15G   41M   14G   1
[16:40:06 root@Bj-Ubuntu ~]# df -h | awk -F% ‘/^\/dev\/sd/{print $1}‘ | awk ‘$NF>=80{print $1,$5}‘
[16:40:36 root@Bj-Ubuntu ~]# df | awk -F‘ +|%‘ ‘/^\/dev\/sd/{if($5>=10)print $1,$5}‘
/dev/sda2 14
[16:42:29 root@Bj-Ubuntu ~]# df | awk -F‘[ %]+‘ ‘/^\/dev\/sd/{if($5>=10)print $1,$5}‘
/dev/sda2 14
[root@centos8 ~]#df | awk -F"[ %]+" ‘/^\/dev\/sd/{if($(NF-1)>10)print $(NF-1)"
full";else {print $(NF-1)" OK"}}‘
3 OK
1 OK
13 full
[root@centos8 ~]#df | awk -F"[ %]+" ‘/^\/dev\/sd/{if($(NF-1)>10){print $(NF-1)"
full"}else {print $(NF-1)" OK"}}‘
3 OK
1 OK
13 full

条件判断 switch

语法

switch(expression) {case VALUE1 or /REGEXP/: statement1; case VALUE2 or
/REGEXP2/: statement2; ...; default: statementn}

循环 while

语法

while (condition) {statement;…}

条件“真”,进入循环;条件“假”,退出循环

使用场景:

对一行内的多个字段逐一类似处理时使用

对数组中的各元素逐一处理时使用

[16:44:01 root@Bj-Ubuntu ~]# awk -v i=1 -v sum=0 ‘BEGIN{while(i<=100){sum+=i;i++};print sum}‘ 
5050
#内置函数length()返回字符数,而非字节数
#内置函数length()返回字符数,而非字节数
[root@centos8 ~]#awk ‘BEGIN{print length("hello")}‘
5
[root@centos8 ~]#awk ‘BEGIN{print length("小鸡蘑菇")}‘
4
[root@centos8 ~]#awk ‘BEGIN{ total=0;i=1;while(i<=100){total+=i;i++};print
total}‘
5050

循环 do-while

语法

do {statement;…}while(condition)

意义:无论真假,至少执行一次循环体

do-while循环
语法:do {statement;…}while(condition)

意义:无论真假,至少执行一次循环体

[root@centos8 ~]#awk ‘BEGIN{ total=0;i=1;do{ total+=i;i++;}while(i<=100);print
total}‘
5050

循环 for

语法

for(expr1;expr2;expr3) {statement;…}

常见用法:

for(variable assignment;condition;iteration process) {for-body}

特殊用法:能够遍历数组中的元素

for(var in array) {for-body}

范例:

[16:47:39 root@Bj-Ubuntu ~]# awk ‘BEGIN{sum=0;for(i=1;i<=100;i++){sum+=i};print sum}‘
5050

范例:

[16:54:00 root@Bj-Ubuntu ~]# awk ‘BEGIN{total=0;for(i=1;i<=100;i++){total+=i}print total}‘
5050

性能比较

[16:55:13 root@Bj-Ubuntu ~]# time (awk ‘BEGIN{total=0;for(i=0;i<=1000000;i++){total+=i}print total}‘)
[17:00:40 root@Bj-Ubuntu ~]# time(total=0;for i in {1..1000000};do total=$(($total+i));done ;echo $total)
[17:01:00 root@Bj-Ubuntu ~]# time(for((i=0;i<=1000000;i++));do let total+=i;done ;echo $total)
[17:02:00 root@Bj-Ubuntu ~]# time(seq -s "+" 1000000 | bc)

范例: 取出字符串中的数字

[17:02:00 root@Bj-Ubuntu ~]# echo ‘dsFUs34tg*fs5a%8ar%$#@‘ |awk -F "" ‘
{
    for(i=1;i<=NF;i++)
    {
      if ($i ~ /[0-9]/)
      {
        str=(str $i)
      }
    }
    print str
}‘

continue 和 break

continue 中断本次循环

break 中断整个循环

格式

continue [n]
break [n]

范例

[root@longwang ~]# awk ‘BEGIN{for(i=1;i<=100;i++){if(i==50)continue;sum+=i}print sum}‘
5000
[root@longwang ~]# awk ‘BEGIN{for(i=1;i<=100;i++){if(i==50)break;sum+=i}print sum}‘
1225

[root@longwang ~]# awk ‘BEGIN{sum=0;for(i=1;i<=100;i++){if(i%2==0)continue;sum+=i}print sum}‘ 
2500
[root@longwang ~]# awk ‘BEGIN{sum=0;for(i=1;i<=100;i++){if(i==50)break;sum+=i}print sum}‘
1225

next

next 可以提前结束对本行处理而直接进入下一行处理(awk自身循环)

范例:

[root@centos8 ~]#awk -F: ‘{if($3%2!=0) next; print $1,$3}‘ /etc/passwd
root 0
daemon 2
lp 4
shutdown 6
mail 8
games 12
ftp 14

文本处理之awk进阶

原文:https://www.cnblogs.com/xuanlv-0413/p/14697459.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!