Getting started
Introduction
AWK processes text files line by line, splitting each line into fields. It's ideal for data extraction, reporting, and transformation tasks.
Basic Syntax
awk 'pattern { action }' file.txt
# Print entire file
awk '{ print }' file.txt
# Print specific fields
awk '{ print $1, $3 }' file.txt
# With pattern matching
awk '/error/ { print $0 }' log.txt
Quick Examples
# Sum first column
awk '{ sum += $1 } END { print sum }' data.txt
# Print line numbers
awk '{ print NR, $0 }' file.txt
# Filter by condition
awk '$3 > 100 { print $1, $3 }' data.txt
# Change field separator
awk -F: '{ print $1, $7 }' /etc/passwd
Command Line Options
| Option | Description |
|---|---|
-F fs |
Set field separator |
-v var=val |
Set variable |
-f file |
Read program from file |
-- |
Signal end of options |
# Custom field separator
awk -F, '{ print $1 }' data.csv
# Set variable
awk -v name="John" '$1 == name' people.txt
# Read from script file
awk -f script.awk data.txt
Built-in Variables
Field Variables
| Variable | Description |
|---|---|
$0 |
Entire line |
$1, $2, ... |
Field 1, field 2, etc. |
$NF |
Last field |
$(NF-1) |
Second to last field |
# Print first and last fields
awk '{ print $1, $NF }' file.txt
# Print entire line
awk '{ print $0 }' file.txt
# Print second to last
awk '{ print $(NF-1) }' file.txt
Record Variables
| Variable | Description |
|---|---|
NR |
Current record number |
NF |
Number of fields |
FNR |
Record number in current file |
FILENAME |
Current filename |
# Print line numbers
awk '{ print NR, $0 }' file.txt
# Print number of fields per line
awk '{ print NF }' file.txt
# Print filename and line
awk '{ print FILENAME, FNR, $0 }' *.txt
Separator Variables
| Variable | Description |
|---|---|
FS |
Input field separator (default: space) |
OFS |
Output field separator (default: space) |
RS |
Input record separator (default: newline) |
ORS |
Output record separator (default: newline) |
# Set field separator to colon
awk 'BEGIN { FS=":" } { print $1 }' /etc/passwd
# Set output separator to comma
awk 'BEGIN { OFS="," } { print $1, $2, $3 }' data.txt
# Multiple character separator
awk 'BEGIN { FS=" : " } { print $1 }' file.txt
Patterns
Pattern Types
# Regular expression
awk '/pattern/ { print }' file.txt
# Comparison
awk '$3 > 100 { print }' data.txt
# Range pattern
awk '/start/,/end/ { print }' file.txt
# Compound pattern
awk '$1 == "error" && $3 > 5 { print }' log.txt
BEGIN and END
# BEGIN: executed before processing
awk 'BEGIN { print "Starting..." }
{ print $0 }
END { print "Done!" }' file.txt
# Initialize variables
awk 'BEGIN { sum=0 }
{ sum+=$1 }
END { print "Total:", sum }' data.txt
# Print header
awk 'BEGIN { print "Name\tScore" }
{ print $1, $2 }' scores.txt
Regular Expressions
# Match pattern
awk '/error|warning/ { print }' log.txt
# Case insensitive (GNU awk)
awk 'tolower($0) ~ /error/ { print }' log.txt
# Field regex match
awk '$2 ~ /^[0-9]+$/ { print }' file.txt
# Negation
awk '$1 !~ /test/ { print }' file.txt
Comparison Operators
| Operator | Description |
|---|---|
== |
Equal |
!= |
Not equal |
< |
Less than |
<= |
Less or equal |
> |
Greater than |
>= |
Greater or equal |
~ |
Matches regex |
!~ |
Does not match regex |
# Numeric comparison
awk '$3 >= 100 { print $1, $3 }' data.txt
# String comparison
awk '$2 == "active" { print }' status.txt
# Regex match
awk '$1 ~ /^A/ { print }' names.txt
Actions & Functions
String Functions
| Function | Description |
|---|---|
length(s) |
String length |
substr(s,i,n) |
Substring from position i, length n |
index(s,t) |
Position of t in s |
split(s,a,fs) |
Split s into array a |
tolower(s) |
Convert to lowercase |
toupper(s) |
Convert to uppercase |
gsub(r,s,t) |
Global substitute |
sub(r,s,t) |
Substitute first match |
# String length
awk '{ print length($1) }' file.txt
# Substring
awk '{ print substr($1, 1, 3) }' file.txt
# Find position
awk '{ print index($0, "error") }' log.txt
# Convert case
awk '{ print toupper($1) }' file.txt
# Replace text
awk '{ gsub(/old/, "new"); print }' file.txt
Numeric Functions
| Function | Description |
|---|---|
int(x) |
Truncate to integer |
sqrt(x) |
Square root |
exp(x) |
Exponential |
log(x) |
Natural logarithm |
sin(x) |
Sine |
cos(x) |
Cosine |
atan2(y,x) |
Arctangent |
rand() |
Random number [0,1) |
srand(x) |
Seed random generator |
# Round numbers
awk '{ print int($1 + 0.5) }' data.txt
# Square root
awk '{ print sqrt($1) }' numbers.txt
# Random numbers
awk 'BEGIN { srand(); print rand() }'
I/O Functions
# Print with newline
awk '{ print $1, $2 }' file.txt
# Print without newline
awk '{ printf "%s ", $1 } END { printf "\n" }' file.txt
# Formatted output
awk '{ printf "%-10s %5d\n", $1, $2 }' data.txt
# Print to file
awk '{ print $1 > "output.txt" }' input.txt
# Append to file
awk '{ print $1 >> "output.txt" }' input.txt
Printf Formatting
| Format | Description |
|---|---|
%s |
String |
%d |
Integer |
%f |
Floating point |
%e |
Scientific notation |
%x |
Hexadecimal |
%o |
Octal |
%% |
Literal % |
# Width and precision
awk '{ printf "%10s %5.2f\n", $1, $2 }' data.txt
# Left align with minus
awk '{ printf "%-10s %d\n", $1, $2 }' data.txt
# Zero padding
awk '{ printf "%05d\n", $1 }' numbers.txt
Control Structures
If-Else
# Simple if
awk '{ if ($3 > 100) print $1 }' data.txt
# If-else
awk '{
if ($3 > 100)
print $1, "high"
else
print $1, "low"
}' data.txt
# If-else-if
awk '{
if ($3 > 100)
status="high"
else if ($3 > 50)
status="medium"
else
status="low"
print $1, status
}' data.txt
Loops
# For loop over fields
awk '{
for (i=1; i<=NF; i++)
print $i
}' file.txt
# While loop
awk '{
i=1
while (i<=NF) {
print $i
i++
}
}' file.txt
# Do-while loop
awk '{
i=1
do {
print $i
i++
} while (i<=NF)
}' file.txt
Loop Control
# Break
awk '{
for (i=1; i<=NF; i++) {
if ($i == "stop") break
print $i
}
}' file.txt
# Continue
awk '{
for (i=1; i<=NF; i++) {
if ($i == "") continue
print $i
}
}' file.txt
# Next (skip to next record)
awk '{
if ($1 == "skip") next
print $0
}' file.txt
Ternary Operator
# condition ? true_value : false_value
awk '{ print ($3 > 100 ? "high" : "low") }' data.txt
# Nested ternary
awk '{
print ($3 > 100 ? "high" : $3 > 50 ? "medium" : "low")
}' data.txt
# With assignment
awk '{
status = ($3 > 100 ? "PASS" : "FAIL")
print $1, status
}' scores.txt
Arrays
Associative Arrays
# Create and access
awk '{
count[$1]++
}
END {
for (name in count)
print name, count[name]
}' data.txt
# Multi-dimensional arrays
awk '{
arr[$1,$2] = $3
}
END {
for (key in arr)
print key, arr[key]
}' data.txt
Array Operations
# Check if key exists
awk '{
if ($1 in seen)
print "Duplicate:", $1
seen[$1] = 1
}' file.txt
# Delete element
awk '{
arr[$1] = $2
if ($3 == "remove")
delete arr[$1]
}' data.txt
# Array length (GNU awk)
awk 'END { print length(array) }' file.txt
Array Iteration
# For-in loop
awk '{
count[$1]++
}
END {
for (key in count)
print key, count[key]
}' data.txt
# Sorted iteration (GNU awk)
awk '{
sum[$1] += $2
}
END {
PROCINFO["sorted_in"] = "@ind_str_asc"
for (key in sum)
print key, sum[key]
}' data.txt
Common Array Patterns
# Count occurrences
awk '{ count[$1]++ }
END { for (k in count) print k, count[k] }' file.txt
# Sum by key
awk '{ sum[$1] += $2 }
END { for (k in sum) print k, sum[k] }' data.txt
# Collect unique values
awk '{ unique[$1] = 1 }
END { for (k in unique) print k }' file.txt
# Find duplicates
awk '{
if (seen[$1]++)
print "Duplicate:", $1
}' file.txt
Common One-Liners
Statistics
# Sum column
awk '{ sum += $1 } END { print sum }' data.txt
# Average
awk '{ sum += $1; n++ } END { print sum/n }' data.txt
# Min and max
awk 'NR==1 { min=max=$1 }
{ if ($1<min) min=$1; if ($1>max) max=$1 }
END { print min, max }' data.txt
# Count lines
awk 'END { print NR }' file.txt
# Count non-empty lines
awk 'NF > 0 { count++ } END { print count }' file.txt
Filtering
# Print lines matching pattern
awk '/error/' log.txt
# Print lines NOT matching pattern
awk '!/debug/' log.txt
# Print lines where field matches
awk '$3 > 100' data.txt
# Print specific columns
awk '{ print $1, $3 }' file.txt
# Print lines between patterns
awk '/START/,/END/' file.txt
Transformation
# Swap columns
awk '{ print $2, $1 }' file.txt
# Add line numbers
awk '{ print NR, $0 }' file.txt
# Remove duplicates (keep first)
awk '!seen[$0]++' file.txt
# Replace field separator
awk 'BEGIN { OFS="\t" } { print $1, $2, $3 }' file.txt
# Convert to uppercase
awk '{ print toupper($0) }' file.txt
CSV Processing
# Parse CSV
awk -F, '{ print $1, $3 }' data.csv
# CSV to TSV
awk -F, 'BEGIN { OFS="\t" } { print $1, $2, $3 }' data.csv
# Remove quotes
awk -F, '{ gsub(/"/, ""); print }' data.csv
# Add quotes
awk -F, '{
for (i=1; i<=NF; i++)
printf "\"%s\"%s", $i, (i<NF ? "," : "\n")
}' data.csv
Log Processing
# Count by status code
awk '{ count[$9]++ }
END { for (c in count) print c, count[c] }' access.log
# Filter by date
awk '$4 ~ /06\/Feb\/2026/' access.log
# Sum response sizes
awk '{ sum += $10 } END { print sum }' access.log
# Top IP addresses
awk '{ ip[$1]++ }
END { for (i in ip) print ip[i], i }' access.log |
sort -rn | head -10
Advanced Topics
Multiple Files
# Process multiple files
awk '{ print FILENAME, FNR, $0 }' file1.txt file2.txt
# Different action per file
awk 'FNR==1 { print "File:", FILENAME }
{ print $0 }' *.txt
# Merge files by key
awk 'NR==FNR { a[$1]=$2; next }
{ print $0, a[$1] }' lookup.txt data.txt
User-Defined Functions
# Define function
awk '
function max(a, b) {
return (a > b) ? a : b
}
{
print max($1, $2)
}' data.txt
# Function with local variables
awk '
function factorial(n, i, result) {
result = 1
for (i=2; i<=n; i++)
result *= i
return result
}
{
print factorial($1)
}' numbers.txt
External Commands
# Execute system command
awk '{
system("echo Processing: " $1)
}' file.txt
# Pipe to command
awk '{
print $0 | "sort -r"
}' file.txt
# Get command output
awk 'BEGIN {
"date" | getline today
print "Today is", today
}'
Field Manipulation
# Add fields
awk '{ $(NF+1) = "new" } { print }' file.txt
# Remove fields
awk '{ $3 = ""; print }' file.txt
# Reorder fields
awk '{ temp=$1; $1=$2; $2=temp } { print }' file.txt
# Modify field
awk '{ $2 = toupper($2) } { print }' file.txt
Operators
Arithmetic Operators
| Operator | Description | Example |
|---|---|---|
+ |
Addition | $1 + $2 |
- |
Subtraction | $1 - $2 |
* |
Multiplication | $1 * $2 |
/ |
Division | $1 / $2 |
% |
Modulo | $1 % $2 |
^ |
Exponentiation | $1 ^ 2 |
++ |
Increment | count++ |
-- |
Decrement | count-- |
# Calculate
awk '{ print $1 + $2 }' data.txt
# Percentage
awk '{ print ($1/$2)*100 "%" }' data.txt
# Compound assignment
awk '{ sum += $1 } END { print sum }' data.txt
Logical Operators
| Operator | Description |
|---|---|
&& |
Logical AND |
|| |
Logical OR |
! |
Logical NOT |
# AND condition
awk '$1 > 10 && $2 < 100' data.txt
# OR condition
awk '$1 == "error" || $1 == "warning"' log.txt
# NOT condition
awk '!($1 ~ /test/)' file.txt
Assignment Operators
| Operator | Description |
|---|---|
= |
Assign |
+= |
Add and assign |
-= |
Subtract and assign |
*= |
Multiply and assign |
/= |
Divide and assign |
%= |
Modulo and assign |
^= |
Exponent and assign |
# Compound assignment
awk '{ sum += $1; count++ }
END { print sum/count }' data.txt
Gotchas
Common Mistakes
Uninitialized Variables
# Wrong: prints nothing for first line
awk '{ print sum; sum += $1 }' data.txt
# Correct: initialize in BEGIN
awk 'BEGIN { sum=0 } { sum += $1; print sum }' data.txt
Field Modification Side Effects
# Modifying any field rebuilds $0
awk '{ $2 = toupper($2); print }' file.txt
# This also reformats spacing!
# To preserve formatting
awk '{
original = $0
$2 = toupper($2)
print $0
}' file.txt
Floating Point Comparison
# Wrong: floating point comparison
awk '$3 == 0.1' data.txt
# Correct: use threshold
awk 'function abs(x){return x<0?-x:x}
abs($3 - 0.1) < 0.0001' data.txt
Performance Tips
# Slow: reading file multiple times
awk '{ getline x < "lookup.txt"; print x, $0 }' data.txt
# Fast: read lookup file once
awk 'NR==FNR { lookup[FNR]=$0; next }
{ print lookup[FNR], $0 }' lookup.txt data.txt
# Avoid unnecessary regex
# Slow
awk '$0 ~ /pattern/' file.txt
# Fast
awk '/pattern/' file.txt
Portability
# GNU awk specific (won't work in POSIX awk)
awk '{ print tolower($0) }' file.txt # tolower is POSIX
awk 'BEGIN { IGNORECASE=1 }' file.txt # IGNORECASE is GNU
# POSIX portable
awk '{ print tolower($0) }' file.txt
# Use explicit path for reproducibility
/usr/bin/awk # POSIX awk
/usr/bin/gawk # GNU awk
AWK vs Others
AWK vs sed
# AWK: field-oriented
awk '{ print $1, $3 }' file.txt
# sed: line-oriented
sed 's/old/new/g' file.txt
# Use AWK for:
# - Column/field manipulation
# - Calculations
# - Complex conditions
# Use sed for:
# - Simple substitutions
# - Line deletions
# - Stream editing
AWK vs grep
# grep: pattern matching only
grep 'error' log.txt
# AWK: pattern matching + processing
awk '/error/ { count++ } END { print count }' log.txt
# Use grep for:
# - Simple pattern matching
# - Binary file search
# - Recursive search
# Use AWK for:
# - Counting, summing
# - Field extraction
# - Formatted output
AWK vs cut/paste
# cut: fixed fields only
cut -d: -f1,3 /etc/passwd
# AWK: flexible field handling
awk -F: '{ print $1, $3 }' /etc/passwd
# AWK advantages:
# - Multiple delimiters
# - Computed fields
# - Conditional output
Also see
- GNU AWK Manual - Official comprehensive documentation
- POSIX AWK Specification - Standard AWK reference
- AWK Tutorial by Bruce Barnett - Excellent beginner-friendly guide
- AWK One-Liners Explained - Practical examples
- The AWK Programming Language (Book) - Original book by creators