批量下载Github指定仓库的所有Release
今天遇到个问题,需要批量拖仓库的Release版本下来一个个实验。手工简直太低效了。
于是喊了GPT来协助写个代码,很完美的完成需求。
import sys
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib.parse import urlparse
import concurrent.futures
from pathlib import Path
def create_session(retries=5, backoff_factor=0.3, status_formalist=(500, 502, 504)):
"""Create and configure a requests session for automatic retries."""
session = requests.Session()
retry_strategy = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_formalist,
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
def extract_user_repo(github_clone_url):
"""Extract username and repository name from a GitHub clone URL."""
path = urlparse(github_clone_url).path
parts = path.strip('/').split('/')
user, repo = parts[0], parts[1]
repo = repo[:-4] if repo.endswith('.git') else repo
return user, repo
def download_asset(session, asset_url, file_path):
"""Download a single Release asset."""
print(f"Downloading {file_path.name}...")
try:
response = session.get(asset_url, stream=True)
response.raise_for_status()
with file_path.open('wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Downloaded {file_path.name}")
except requests.RequestException as e:
print(f"Failed to download {file_path.name}: {e}")
def download_releases(github_clone_url):
"""Download all Releases for a specified repository."""
session = create_session()
user, repo = extract_user_repo(github_clone_url)
releases_url = f"https://api.github.com/repos/{user}/{repo}/releases"
response = session.get(releases_url)
releases = response.json()
directory = Path.cwd() / repo
directory.mkdir(exist_ok=True)
print(f"Total assets to download: {sum(len(release['assets']) for release in releases)}")
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
futures = [
executor.submit(download_asset, session, asset['browser_download_url'], directory / asset['name'])
for release in releases for asset in release['assets']
]
concurrent.futures.wait(futures)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script.py <github_repo_url>")
sys.exit(1)
download_releases(sys.argv[1])
使用的话直接python 跑即可
运行后会用项目名称作为文件夹,所有Relase文件乖乖在里头