numpy基础

Numpy 简介

NumPy是一个Python包。它代表“Numeric Python”。它是一个由多维数组对象和用于处理数组的例程集合组成的库。

Numeric,即 NumPy 的前身,是由 Jim Hugunin 开发的。也开发了另一个包Numarray,它拥有一些额外的功能。2005年,Travis Oliphant通过将 Numarray的功能集成到Numeric包中来创建NumPy包。目前这个开源项目已经有非常多的贡献者。

环境搭建

在安装了pythonpip之后,一个命令搞定。

pip install numpy

然后我们进入Python交互式shell。

1
2
3
import numpy as np 
a = np.array([1,2,3])
print a

如果你能正确执行上述代码,那么你的numpy环境就已经搭建好了。

基本属性

ndarray.ndim:数组维度
ndarray.shape:数组行和列的长度
ndarray.size:同shape
ndarray.dtype:数组中元素的类型
ndarray.itemsize:数组中单个元素所占字节数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
>>> import numpy as np
>>> a = np.arange(15).reshape(3, 5)
>>> a
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
>>> a.shape
(3, 5)
>>> a.ndim
2
>>> a.dtype.name
'int64'
>>> a.itemsize
8
>>> a.size
15
>>> type(a)
<type 'numpy.ndarray'>
>>> b = np.array([6, 7, 8])
>>> b
array([6, 7, 8])
>>> type(b)
<type 'numpy.ndarray'>

创建数组

创建数组的方式有很多,我们直接看代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
>>> import numpy as np
>>> a = np.array([2,3,4])
>>> a
array([2, 3, 4])
>>> a.dtype
dtype('int64')
>>> b = np.array([1.2, 3.5, 5.1])
>>> b.dtype
dtype('float64')

>>> a = np.array(1,2,3,4) # WRONG
>>> a = np.array([1,2,3,4]) # RIGHT

>>> b = np.array([(1.5,2,3), (4,5,6)])
>>> b
array([[ 1.5, 2. , 3. ],
[ 4. , 5. , 6. ]])


>>> c = np.array( [ [1,2], [3,4] ], dtype=complex ) # 复数
>>> c
array([[ 1.+0.j, 2.+0.j],
[ 3.+0.j, 4.+0.j]])


>>> np.zeros( (3,4) )
array([[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.]])
>>> np.ones( (2,3,4), dtype=np.int16 ) # dtype 也可以被指定
array([[[ 1, 1, 1, 1],
[ 1, 1, 1, 1],
[ 1, 1, 1, 1]],
[[ 1, 1, 1, 1],
[ 1, 1, 1, 1],
[ 1, 1, 1, 1]]], dtype=int16)
>>> np.empty( (2,3) ) # 未初始化,输出可能会稍许怪异
array([[ 3.73603959e-262, 6.02658058e-154, 6.55490914e-260],
[ 5.30498948e-313, 3.14673309e-307, 1.00000000e+000]])


>>> np.arange( 10, 30, 5 )
array([10, 15, 20, 25])
>>> np.arange( 0, 2, 0.3 ) # 可接受float型步长参数
array([ 0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])


>>> np.linspace( 0, 2, 9 ) # 从0到2的9个数字
array([ 0. , 0.25, 0.5 , 0.75, 1. , 1.25, 1.5 , 1.75, 2. ])


>>> np.random.rand(3,2)
array([[ 0.14022471, 0.96360618], #random
[ 0.37601032, 0.25528411], #random
[ 0.49313049, 0.94909878]]) #random

基本操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
>>> a = np.array( [20,30,40,50] )
>>> b = np.arange( 4 )
>>> b
array([0, 1, 2, 3])
>>> c = a-b
>>> c
array([20, 29, 38, 47])
>>> b**2
array([0, 1, 4, 9])
>>> 10*np.sin(a)
array([ 9.12945251, -9.88031624, 7.4511316 , -2.62374854])
>>> a<35
array([ True, True, False, False])


>>> A = np.array( [[1,1], [0,1]] )
>>> B = np.array( [[2,0], [3,4]] )
>>> A * B
array([[2, 0],
[0, 4]])
>>> A @ B
array([[5, 4],
[3, 4]])
>>> A.dot(B)
array([[5, 4],
[3, 4]])

>>> a = np.ones((2,3), dtype=int)
>>> b = np.random.random((2,3))
>>> a *= 3
>>> a
array([[3, 3, 3],
[3, 3, 3]])
>>> b += a
>>> b
array([[ 3.417022 , 3.72032449, 3.00011437],
[ 3.30233257, 3.14675589, 3.09233859]])
>>> a += b # b不会自动从float转变为int
Traceback (most recent call last):
...
TypeError: Cannot cast ufunc add output from dtype('float64') to dtype('int64') with casting rule 'same_kind'


>>> from numpy import pi
>>> a = np.ones(3, dtype=np.int32)
>>> b = np.linspace(0,pi,3)
>>> b.dtype.name
'float64'
>>> c = a+b
>>> c
array([ 1. , 2.57079633, 4.14159265])
>>> c.dtype.name
'float64'
>>> d = np.exp(c*1j)
>>> d
array([ 0.54030231+0.84147098j, -0.84147098+0.54030231j,
-0.54030231-0.84147098j])
>>> d.dtype.name
'complex128'


>>> a = np.random.random((2,3))
>>> a
array([[ 0.18626021, 0.34556073, 0.39676747],
[ 0.53881673, 0.41919451, 0.6852195 ]])
>>> a.sum()
2.5718191614547998
>>> a.min()
0.1862602113776709
>>> a.max()
0.6852195003967595


>>> b = np.arange(12).reshape(3,4)
>>> b
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>>
>>> b.sum(axis=0) # 每列之和
array([12, 15, 18, 21])
>>>
>>> b.min(axis=1) # 每行最小值
array([0, 4, 8])
>>>
>>> b.cumsum(axis=1) # 各行累加
array([[ 0, 1, 3, 6],
[ 4, 9, 15, 22],
[ 8, 17, 27, 38]])

通用数学函数

1
2
3
4
5
6
7
8
9
10
>>> B = np.arange(3)
>>> B
array([0, 1, 2])
>>> np.exp(B)
array([ 1. , 2.71828183, 7.3890561 ])
>>> np.sqrt(B)
array([ 0. , 1. , 1.41421356])
>>> C = np.array([2., -1., 4.])
>>> np.add(B, C)
array([ 2., 0., 6.])

索引,切片和迭代

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
>>> a = np.arange(10)**3
>>> a
array([ 0, 1, 8, 27, 64, 125, 216, 343, 512, 729])
>>> a[2]
8
>>> a[2:5]
array([ 8, 27, 64])
>>> a[:6:2] = -1000 # equivalent to a[0:6:2] = -1000; from start to position 6, exclusive, set every 2nd element to -1000
>>> a
array([-1000, 1, -1000, 27, -1000, 125, 216, 343, 512, 729])
>>> a[ : :-1] # reversed a
array([ 729, 512, 343, 216, 125, -1000, 27, -1000, 1, -1000])
>>> for i in a:
... print(i**(1/3.))
...
nan
1.0
nan
3.0
nan
5.0
6.0
7.0
8.0
9.0


>>> def f(x,y):
... return 10*x+y
...
>>> b = np.fromfunction(f,(5,4),dtype=int)
>>> b
array([[ 0, 1, 2, 3],
[10, 11, 12, 13],
[20, 21, 22, 23],
[30, 31, 32, 33],
[40, 41, 42, 43]])
>>> b[2,3]
23
>>> b[0:5, 1] # 1到5行第二个
array([ 1, 11, 21, 31, 41])
>>> b[ : ,1] # 每行第二个
array([ 1, 11, 21, 31, 41])
>>> b[1:3, : ] # 2到3行
array([[10, 11, 12, 13],
[20, 21, 22, 23]])
>>> b[-1] # 最后一行
array([40, 41, 42, 43])


>>> c = np.array( [[[ 0, 1, 2], # 3D数组
... [ 10, 12, 13]],
... [[100,101,102],
... [110,112,113]]])
>>> c.shape
(2, 2, 3)
>>> c[1,...] # 同 c[1,:,:] 和 c[1]
array([[100, 101, 102],
[110, 112, 113]])
>>> c[...,2] # 同 c[:,:,2]
array([[ 2, 13],
[102, 113]])


>>> for row in b:
... print(row)
...
[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]


>>> for element in b.flat:
... print(element)
...
0
1
2
3
10
11
12
13
20
21
22
23
30
31
32
33
40
41
42
43

矩阵处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
>>> a = np.floor(10*np.random.random((3,4)))
>>> a
array([[ 2., 8., 0., 6.],
[ 4., 5., 1., 1.],
[ 8., 9., 3., 6.]])
>>> a.shape
(3, 4)


>>> a.ravel() # 返回扁平化的矩阵
array([ 2., 8., 0., 6., 4., 5., 1., 1., 8., 9., 3., 6.])
>>> a.reshape(6,2) # 改变矩阵的形状
array([[ 2., 8.],
[ 0., 6.],
[ 4., 5.],
[ 1., 1.],
[ 8., 9.],
[ 3., 6.]])
>>> a.T # 矩阵的转置
array([[ 2., 4., 8.],
[ 8., 5., 9.],
[ 0., 1., 3.],
[ 6., 1., 6.]])
>>> a.T.shape
(4, 3)
>>> a.shape
(3, 4)


>>> a
array([[ 2., 8., 0., 6.],
[ 4., 5., 1., 1.],
[ 8., 9., 3., 6.]])
>>> a.resize((2,6))
>>> a
array([[ 2., 8., 0., 6., 4., 5.],
[ 1., 1., 8., 9., 3., 6.]])


>>> a.reshape(3,-1)
array([[ 2., 8., 0., 6.],
[ 4., 5., 1., 1.],
[ 8., 9., 3., 6.]])

数组的分割

1
2
3
4
5
6
7
8
9
10
11
12
13
14
>>> a = np.floor(10*np.random.random((2,12)))
>>> a
array([[ 9., 5., 6., 3., 6., 8., 0., 7., 9., 7., 2., 7.],
[ 1., 4., 9., 2., 2., 1., 0., 6., 2., 2., 4., 0.]])
>>> np.hsplit(a,3)
[array([[ 9., 5., 6., 3.],
[ 1., 4., 9., 2.]]), array([[ 6., 8., 0., 7.],
[ 2., 1., 0., 6.]]), array([[ 9., 7., 2., 7.],
[ 2., 2., 4., 0.]])]
>>> np.hsplit(a,(3,4))
[array([[ 9., 5., 6.],
[ 1., 4., 9.]]), array([[ 3.],
[ 2.]]), array([[ 6., 8., 0., 7., 9., 7., 2., 7.],
[ 2., 1., 0., 6., 2., 2., 4., 0.]])]

复制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
>>> a = np.arange(12)
>>> b = a # 并没有创建新数组
>>> b is a
True
>>> b.shape = 3,4
>>> a.shape
(3, 4)

>>> def f(x):
... print(id(x))
...
>>> id(a)
148293216
>>> f(a)
148293216


>>> c = a.view()
>>> c is a
False
>>> c.base is a
True
>>> c.flags.owndata
False
>>>
>>> c.shape = 2,6 # a的形状不变
>>> a.shape
(3, 4)
>>> c[0,4] = 1234 # a的数据会变
>>> a
array([[ 0, 1, 2, 3],
[1234, 5, 6, 7],
[ 8, 9, 10, 11]])


>>> s = a[ : , 1:3] # 广播
>>> s[:] = 10
>>> a
array([[ 0, 10, 10, 3],
[1234, 10, 10, 7],
[ 8, 10, 10, 11]])


>>> d = a.copy() # 深复制
>>> d is a
False
>>> d.base is a
False
>>> d[0,0] = 9999
>>> a
array([[ 0, 10, 10, 3],
[1234, 10, 10, 7],
[ 8, 10, 10, 11]])

索引技巧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
>>> a = np.arange(12)**2                       # 平方
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121],
dtype=int32)
>>> i = np.array( [ 1,1,3,8,5 ] )
>>> a[i] # 对应位置元素
array([ 1, 1, 9, 64, 25])
>>>
>>> j = np.array( [ [ 3, 4], [ 9, 7 ] ] )
>>> a[j] # 对应位置元素
array([[ 9, 16],
[81, 49]])


>>> palette = np.array( [ [0,0,0], # black
... [255,0,0], # red
... [0,255,0], # green
... [0,0,255], # blue
... [255,255,255] ] ) # white
>>> image = np.array( [ [ 0, 1, 2, 0 ],
... [ 0, 3, 4, 0 ] ] )
>>> palette[image]
array([[[ 0, 0, 0],
[255, 0, 0],
[ 0, 255, 0],
[ 0, 0, 0]],
[[ 0, 0, 0],
[ 0, 0, 255],
[255, 255, 255],
[ 0, 0, 0]]])


>>> a = np.arange(12).reshape(3,4)
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> i = np.array( [ [0,1],
... [1,2] ] )
>>> j = np.array( [ [2,1],
... [3,3] ] )
>>> a[i]
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7]],

[[ 4, 5, 6, 7],
[ 8, 9, 10, 11]]])
>>> a[i,j]
array([[ 2, 5],
[ 7, 11]])
>>>
>>> a[i,2]
array([[ 2, 6],
[ 6, 10]])
>>>
>>> a[:,j]
array([[[ 2, 1],
[ 3, 3]],
[[ 6, 5],
[ 7, 7]],
[[10, 9],
[11, 11]]])


>>> l = [i,j]
>>> a[l]
array([[ 2, 5],
[ 7, 11]])


>>> s = np.array( [i,j] )
>>> a[s]
Traceback (most recent call last):
File "<stdin>", line 1, in ?
IndexError: index (3) out of range (0<=index<=2) in dimension 0
>>>
>>> a[tuple(s)] # 同 a[i,j]
array([[ 2, 5],
[ 7, 11]])


>>> time = np.linspace(20, 145, 5)
>>> data = np.sin(np.arange(20)).reshape(5,4)
>>> time
array([ 20. , 51.25, 82.5 , 113.75, 145. ])
>>> data
array([[ 0. , 0.84147098, 0.90929743, 0.14112001],
[-0.7568025 , -0.95892427, -0.2794155 , 0.6569866 ],
[ 0.98935825, 0.41211849, -0.54402111, -0.99999021],
[-0.53657292, 0.42016704, 0.99060736, 0.65028784],
[-0.28790332, -0.96139749, -0.75098725, 0.14987721]])
>>>
>>> ind = data.argmax(axis=0) # 各行最大值索引
>>> ind
array([2, 0, 3, 1])
>>>
>>> time_max = time[ind]
>>>
>>> data_max = data[ind, range(data.shape[1])]
>>>
>>> time_max
array([ 82.5 , 20. , 113.75, 51.25])
>>> data_max
array([ 0.98935825, 0.84147098, 0.99060736, 0.6569866 ])
>>>
>>> np.all(data_max == data.max(axis=0))
True


>>> a = np.arange(5)
>>> a
array([0, 1, 2, 3, 4])
>>> a[[1,3,4]] = 0
>>> a
array([0, 0, 2, 0, 0])


>>> a = np.arange(5)
>>> a[[0,0,2]]=[1,2,3]
>>> a
array([2, 1, 3, 3, 4])


>>> a = np.arange(5)
>>> a[[0,0,2]]+=1
>>> a
array([1, 1, 3, 3, 4])


>>> a = np.arange(12).reshape(3,4)
>>> b = a > 4
>>> b
array([[False, False, False, False],
[False, True, True, True],
[ True, True, True, True]])
>>> a[b]
array([ 5, 6, 7, 8, 9, 10, 11])


>>> a[b] = 0 # 大于4均变成0
>>> a
array([[0, 1, 2, 3],
[4, 0, 0, 0],
[0, 0, 0, 0]])

>>> a = np.arange(12).reshape(3,4)
>>> b1 = np.array([False,True,True]) # first dim selection
>>> b2 = np.array([True,False,True,False]) # second dim selection
>>>
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>>
>>> a[b1,:] # 选择行
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>>
>>> a[b1] # 同上
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>>
>>> a[:,b2] # 选择列
array([[ 0, 2],
[ 4, 6],
[ 8, 10]])
>>>
>>> a[b1,b2] # 很奇怪的选择
array([ 4, 10])

曼德布洛特集合

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import numpy as np
import matplotlib.pyplot as plt
def mandelbrot( h,w, maxit=20 ):
"""Returns an image of the Mandelbrot fractal of size (h,w)."""
y,x = np.ogrid[ -1.4:1.4:h*1j, -2:0.8:w*1j ]
c = x+y*1j
z = c
divtime = maxit + np.zeros(z.shape, dtype=int)
for i in range(maxit):
z = z**2 + c
diverge = z*np.conj(z) > 2**2
div_now = diverge & (divtime==maxit)
divtime[div_now] = i
z[diverge] = 2
return divtime
plt.imshow(mandelbrot(400,400))
plt.show()

Mandelbrot set

线性代数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
>>> import numpy as np
>>> a = np.array([[1.0, 2.0], [3.0, 4.0]])
>>> print(a)
[[ 1. 2.]
[ 3. 4.]]

>>> a.transpose()
array([[ 1., 3.],
[ 2., 4.]])

>>> np.linalg.inv(a)
array([[-2. , 1. ],
[ 1.5, -0.5]])

>>> u = np.eye(2) # 2x2 单位矩阵; "eye" 表示 "I",单位矩阵
>>> u
array([[ 1., 0.],
[ 0., 1.]])
>>> j = np.array([[0.0, -1.0], [1.0, 0.0]])

>>> j @ j # 矩阵
array([[-1., 0.],
[ 0., -1.]])

>>> np.trace(u) # 计算对角线元素的和
2.0

>>> y = np.array([[5.], [7.]])
>>> np.linalg.solve(a, y)
array([[-3.],
[ 4.]])

>>> np.linalg.eig(j)
(array([ 0.+1.j, 0.-1.j]), array([[ 0.70710678+0.j , 0.70710678-0.j ],
[ 0.00000000-0.70710678j, 0.00000000+0.70710678j]]))

小技巧

“自动”变型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
>>> a = np.arange(30)
>>> a.shape = 2,-1,3 # -1 means "whatever is needed"
>>> a.shape
(2, 5, 3)
>>> a
array([[[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11],
[12, 13, 14]],
[[15, 16, 17],
[18, 19, 20],
[21, 22, 23],
[24, 25, 26],
[27, 28, 29]]])

处理直方图

1
2
3
4
5
6
7
8
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> # 方差 0.5^2, 均值 2
>>> mu, sigma = 2, 0.5
>>> v = np.random.normal(mu,sigma,10000)
>>> # 标准直方图
>>> plt.hist(v, bins=50, density=1)
>>> plt.show()

histogram

1
2
3
4
>>> # 使用numpy计算
>>> (n, bins) = np.histogram(v, bins=50, density=True)
>>> plt.plot(.5*(bins[1:]+bins[:-1]), n)
>>> plt.show()

histogram_numpy