Introduction to NumPy
NumPy (Numerical Python) is the foundational package for scientific computing in Python. It provides support for large, multi-dimensional arrays and matrices, along with a collection of mathematical functions to operate on these arrays efficiently.
Why Use NumPy?
- Performance: NumPy arrays are up to 50x faster than Python lists
- Memory Efficient: Uses less memory than Python lists
- Convenient: Extensive collection of mathematical functions
- Foundation: Used by pandas, scikit-learn, TensorFlow, and more
Installation
Install NumPy using pip:
pip install numpy
Import NumPy in your Python code:
import numpy as np
Creating NumPy Arrays
From Python Lists
# 1D array
arr1d = np.array([1, 2, 3, 4, 5])
print(arr1d) # [1 2 3 4 5]
# 2D array
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2d)
# [[1 2 3]
# [4 5 6]]
Common Array Creation Functions
# Array of zeros
zeros = np.zeros((3, 4)) # 3x4 array of zeros
# Array of ones
ones = np.ones((2, 3)) # 2x3 array of ones
# Array with a range of values
range_arr = np.arange(0, 10, 2) # [0 2 4 6 8]
# Array with evenly spaced values
linspace_arr = np.linspace(0, 1, 5) # 5 values from 0 to 1
# Identity matrix
identity = np.eye(3) # 3x3 identity matrix
# Random arrays
random_arr = np.random.rand(3, 3) # 3x3 random values [0,1)
random_int = np.random.randint(0, 10, (3, 3)) # 3x3 random integers
Array Attributes
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr.shape) # (2, 3) - dimensions
print(arr.ndim) # 2 - number of dimensions
print(arr.size) # 6 - total elements
print(arr.dtype) # int64 - data type
Array Operations
Arithmetic Operations
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(a + b) # [5 7 9] - addition
print(a - b) # [-3 -3 -3] - subtraction
print(a * b) # [4 10 18] - multiplication
print(a / b) # [0.25 0.4 0.5] - division
print(a ** 2) # [1 4 9] - exponentiation
Broadcasting
Broadcasting allows NumPy to perform operations on arrays of different shapes.
arr = np.array([[1, 2, 3], [4, 5, 6]])
# Add scalar to array
print(arr + 10)
# [[11 12 13]
# [14 15 16]]
# Multiply array by scalar
print(arr * 2)
# [[2 4 6]
# [8 10 12]]
Universal Functions (ufuncs)
arr = np.array([1, 4, 9, 16])
print(np.sqrt(arr)) # [1. 2. 3. 4.]
print(np.exp(arr)) # exponential
print(np.log(arr)) # natural logarithm
print(np.sin(arr)) # sine
print(np.cos(arr)) # cosine
Indexing and Slicing
Basic Indexing
arr = np.array([10, 20, 30, 40, 50])
print(arr[0]) # 10 - first element
print(arr[-1]) # 50 - last element
print(arr[1:4]) # [20 30 40] - slice
print(arr[::2]) # [10 30 50] - every 2nd element
2D Array Indexing
arr2d = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
print(arr2d[0, 0]) # 1 - element at row 0, col 0
print(arr2d[1]) # [4 5 6] - entire row 1
print(arr2d[:, 1]) # [2 5 8] - entire column 1
print(arr2d[:2, :2]) # [[1 2] [4 5]] - subarray
Boolean Indexing
arr = np.array([1, 2, 3, 4, 5])
# Get elements greater than 3
mask = arr > 3
print(arr[mask]) # [4 5]
# Shorter version
print(arr[arr > 3]) # [4 5]
# Multiple conditions
print(arr[(arr > 2) & (arr < 5)]) # [3 4]
Reshaping and Combining Arrays
Reshaping
arr = np.arange(12) # [0 1 2 ... 11]
# Reshape to 3x4
reshaped = arr.reshape(3, 4)
# Flatten to 1D
flattened = reshaped.flatten()
# Transpose
transposed = reshaped.T
Combining Arrays
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
# Vertical stack (row-wise)
v_stack = np.vstack((a, b))
# [[1 2]
# [3 4]
# [5 6]
# [7 8]]
# Horizontal stack (column-wise)
h_stack = np.hstack((a, b))
# [[1 2 5 6]
# [3 4 7 8]]
# Concatenate
concat = np.concatenate((a, b), axis=0) # same as vstack
Mathematical Functions
Aggregate Functions
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr.sum()) # 21 - sum of all elements
print(arr.sum(axis=0)) # [5 7 9] - sum along columns
print(arr.sum(axis=1)) # [6 15] - sum along rows
print(arr.mean()) # 3.5 - mean
print(arr.std()) # standard deviation
print(arr.min()) # 1 - minimum
print(arr.max()) # 6 - maximum
print(arr.argmin()) # 0 - index of minimum
print(arr.argmax()) # 5 - index of maximum
Linear Algebra
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
# Matrix multiplication
mat_mult = np.dot(a, b) # or a @ b
# Determinant
det = np.linalg.det(a)
# Inverse
inv = np.linalg.inv(a)
# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(a)
Advanced Topics
Copying Arrays
Important: Assignment creates a reference, not a copy!
arr = np.array([1, 2, 3])
# Reference (not a copy!)
ref = arr
ref[0] = 99
print(arr) # [99 2 3] - original is changed!
# Create a copy
arr = np.array([1, 2, 3])
copy = arr.copy()
copy[0] = 99
print(arr) # [1 2 3] - original unchanged
print(copy) # [99 2 3]
Sorting
arr = np.array([3, 1, 4, 1, 5])
# Sort array
sorted_arr = np.sort(arr) # [1 1 3 4 5]
# Get sorted indices
indices = np.argsort(arr) # [1 3 0 2 4]
# Sort 2D array
arr2d = np.array([[3, 1], [2, 4]])
sorted_2d = np.sort(arr2d, axis=1) # sort each row
Unique Values
arr = np.array([1, 2, 2, 3, 3, 3, 4])
# Get unique values
unique = np.unique(arr) # [1 2 3 4]
# Get counts
unique, counts = np.unique(arr, return_counts=True)
# unique: [1 2 3 4]
# counts: [1 2 3 1]
Practical Example: Data Analysis
Analyzing Student Scores
# Student scores in 3 subjects
scores = np.array([
[85, 90, 78], # Student 1
[92, 88, 95], # Student 2
[75, 82, 80], # Student 3
[88, 86, 90] # Student 4
])
# Average score per student
student_avg = scores.mean(axis=1)
print("Student averages:", student_avg)
# Average score per subject
subject_avg = scores.mean(axis=0)
print("Subject averages:", subject_avg)
# Find best performing student
best_student = student_avg.argmax()
print(f"Best student: {best_student + 1}")
# Students scoring above 85 in all subjects
all_above_85 = np.all(scores >= 85, axis=1)
print("Students with all scores ≥85:", all_above_85)