动手学深度学习视频地址:https://www.bilibili.com/list/1567748478
教材地址:https://zh.d2l.ai/

本文记录学习笔记(精简版)。友情提醒,建议看视频学习,看教材会睡着。

conda安装

https://conda.io/en/latest/miniconda.html

conda create --name d2l python=3.9 -y
conda activate d2l
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install d2l==0.17.6
mkdir d2l-zh && cd d2l-zh
curl https://zh-v2.d2l.ai/d2l-zh-2.0.0.zip -o d2l-zh.zip
unzip d2l-zh.zip && rm d2l-zh.zip
cd pytorch
jupyter notebook

预备知识

数据操作


# import torch

# x = torch.arange(3)
# print(x.shape)
# print(x.numel())
# print(x.reshape(-1, 1))
# print(torch.zeros((1, 2, 3)))
# print(torch.ones((1, 2, 3)))
# print(torch.randn(3, 4))
# print(torch.tensor([[1, 2, 3], [1, 2, 3]]))
# print(torch.exp(x))

X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
print(X)
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
print(Y)
# print('----')
# print(torch.cat((X, Y), dim=0))
# print(torch.cat((X, Y), dim=1))
# print(X == Y)
# print(X.sum())

print(X[-1])
print(X[1:3])

数据预处理

import os
import pandas as pd
import torch

os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
# with open(data_file, 'w') as f:
#     f.write('NumRooms,Alley,Price\n')  # 列名
#     f.write('NA,Pave,127500\n')  # 每行表示一个数据样本
#     f.write('2,NA,106000\n')
#     f.write('4,NA,178100\n')
#     f.write('NA,NA,140000\n')

data = pd.read_csv(data_file)
print(data)
# 将价格列分开
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)
X = torch.tensor(inputs.to_numpy(dtype=float))
y = torch.tensor(outputs.to_numpy(dtype=float))
print(X, y)

线性代数

import torch

# 矩阵
A = torch.arange(20, dtype=torch.float32).reshape(5, 4) # 矩阵
print(A, A.T)

B = A.clone()
print(A * B)

# 标量和矩阵计算
a = 2 # 标量
X = torch.arange(24).reshape(2, 3, 4)
print(X)
print(a + X, a * X)

# 求和
x = torch.arange(4, dtype=torch.float32) # 向量
print(x)
print(x.sum())
print(A.sum(axis=0))
print(A.sum(axis=1))
print(A.sum(axis=1, keepdims=True))
print(A.cumsum(axis=0))

# 点积
y = torch.ones(4, dtype = torch.float32)
print(x, y, torch.dot(x, y))

# 向量积
print(torch.mv(A, x)) # 矩阵的列必须与向量的维数相同

# 矩阵乘矩阵
B = torch.ones(4, 3)
print(torch.mm(A, B))

# L2范数
u = torch.tensor([3.0, -4.0])
print(torch.norm(u))
print(torch.norm(torch.ones((4, 9)))) # 矩阵范数

微积分

import numpy as np
from d2l.torch import plot


# 函数
def f(x):
    return 3 * x ** 2 - 4 * x


# 导数定义
def numerical_lim(f, x, h):
    return (f(x + h) - f(x)) / h


# h 无限将近0
h = 0.1
for i in range(5):
    print(f'h={h:.5f}, numerical limit={numerical_lim(f, 1, h):.5f}')
    h *= 0.1

# 绘制原函数和原函数的切线
x = np.arange(0, 3, 0.1)
plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])

自动微分

import torch

x = torch.arange(4.0)
print(x)

x.requires_grad_(True)  # 等价于x=torch.arange(4.0,requires_grad=True)
print(x.grad)  # 默认值是None

y = 2 * torch.dot(x, x)
print(y)

# 自动计算y关于x每个分量的梯度
y.backward()
print(x.grad)

# 计算x的另一个函数
x.grad.zero_()
y = x.sum()
y.backward()
print(x.grad)

# 当y不是标量时,向量y关于向量x的导数的最自然解释是一个矩阵。 对于高阶和高维的y和x,求导的结果可以是一个高阶张量
x.grad.zero_()
y = x * x
# 等价于y.backward(torch.ones(len(x)))
y.sum().backward()
print(x.grad)

概率

import torch
from torch.distributions import multinomial
from d2l import torch as d2l

# 掷骰子的概率
fair_probs = torch.ones([6]) / 6  # 1/6 1/6 1/6 1/6 1/6 1/6
# 多项分布 将概率分配给一些离散选择
# counts = multinomial.Multinomial(1000, fair_probs).sample()  # 模拟1000次投掷
# print(counts / 1000)  # 相对频率作为估计值

# 进行500组实验,每组抽取10个样本
counts = multinomial.Multinomial(10, fair_probs).sample((500,))
cum_counts = counts.cumsum(dim=0)
estimates = cum_counts / cum_counts.sum(dim=1, keepdims=True)

d2l.set_figsize((6, 4.5))
for i in range(6):
    d2l.plt.plot(estimates[:, i].numpy(),
                 label=("P(die=" + str(i + 1) + ")"))
d2l.plt.axhline(y=0.167, color='black', linestyle='dashed')
d2l.plt.gca().set_xlabel('Groups of experiments')
d2l.plt.gca().set_ylabel('Estimated probability')
d2l.plt.legend()

P(X=a):表示x=a 时的概率
P(A=a,B=b):联合概率。表示A=a且B=b同时发生的概率
P(B=b|A=a):条件概率。表示B=b的概率,前提是A=a已经发生
...
贝叶斯定理
...略

发表评论