PPO
병렬 환경
from stable_baselines3.common.env_util import make_vec_env
vec_env = make_vec_env("CartPole-v1", n_envs=4)
학습
from stable_baselines3 import PPO
model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=25000, progress_bar=True)
시각화
import tqdm
import gymnasium as gym
env = gym.make("CartPole-v1", render_mode="rgb_array")
render_episode(env, model)