chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境
- 包含所有Python依赖包
- 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@@ -0,0 +1,173 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains CLI utilities (styling, helpers)."""
import importlib.metadata
import os
import time
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Optional
import click
import typer
from huggingface_hub import __version__, constants
from huggingface_hub.utils import ANSI, get_session, hf_raise_for_status, installation_method, logging
logger = logging.get_logger()
if TYPE_CHECKING:
from huggingface_hub.hf_api import HfApi
def get_hf_api(token: Optional[str] = None) -> "HfApi":
# Import here to avoid circular import
from huggingface_hub.hf_api import HfApi
return HfApi(token=token, library_name="huggingface-cli", library_version=__version__)
#### TYPER UTILS
class AlphabeticalMixedGroup(typer.core.TyperGroup):
"""
Typer Group that lists commands and sub-apps mixed and alphabetically.
"""
def list_commands(self, ctx: click.Context) -> list[str]: # type: ignore[name-defined]
# click.Group stores both commands and subgroups in `self.commands`
return sorted(self.commands.keys())
def typer_factory(help: str) -> typer.Typer:
return typer.Typer(
help=help,
add_completion=True,
no_args_is_help=True,
cls=AlphabeticalMixedGroup,
# Disable rich completely for consistent experience
rich_markup_mode=None,
rich_help_panel=None,
pretty_exceptions_enable=False,
)
class RepoType(str, Enum):
model = "model"
dataset = "dataset"
space = "space"
RepoIdArg = Annotated[
str,
typer.Argument(
help="The ID of the repo (e.g. `username/repo-name`).",
),
]
RepoTypeOpt = Annotated[
RepoType,
typer.Option(
help="The type of repository (model, dataset, or space).",
),
]
TokenOpt = Annotated[
Optional[str],
typer.Option(
help="A User Access Token generated from https://huggingface.co/settings/tokens.",
),
]
PrivateOpt = Annotated[
bool,
typer.Option(
help="Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists.",
),
]
RevisionOpt = Annotated[
Optional[str],
typer.Option(
help="Git revision id which can be a branch name, a tag, or a commit hash.",
),
]
### PyPI VERSION CHECKER
def check_cli_update() -> None:
"""
Check whether a newer version of `huggingface_hub` is available on PyPI.
If a newer version is found, notify the user and suggest updating.
If current version is a pre-release (e.g. `1.0.0.rc1`), or a dev version (e.g. `1.0.0.dev1`), no check is performed.
This function is called at the entry point of the CLI. It only performs the check once every 24 hours, and any error
during the check is caught and logged, to avoid breaking the CLI.
"""
try:
_check_cli_update()
except Exception:
# We don't want the CLI to fail on version checks, no matter the reason.
logger.debug("Error while checking for CLI update.", exc_info=True)
def _check_cli_update() -> None:
current_version = importlib.metadata.version("huggingface_hub")
# Skip if current version is a pre-release or dev version
if any(tag in current_version for tag in ["rc", "dev"]):
return
# Skip if already checked in the last 24 hours
if os.path.exists(constants.CHECK_FOR_UPDATE_DONE_PATH):
mtime = os.path.getmtime(constants.CHECK_FOR_UPDATE_DONE_PATH)
if (time.time() - mtime) < 24 * 3600:
return
# Touch the file to mark that we did the check now
Path(constants.CHECK_FOR_UPDATE_DONE_PATH).touch()
# Check latest version from PyPI
response = get_session().get("https://pypi.org/pypi/huggingface_hub/json", timeout=2)
hf_raise_for_status(response)
data = response.json()
latest_version = data["info"]["version"]
# If latest version is different from current, notify user
if current_version != latest_version:
method = installation_method()
if method == "brew":
update_command = "brew upgrade huggingface-cli"
elif method == "hf_installer" and os.name == "nt":
update_command = 'powershell -NoProfile -Command "iwr -useb https://hf.co/cli/install.ps1 | iex"'
elif method == "hf_installer":
update_command = "curl -LsSf https://hf.co/cli/install.sh | bash -"
else: # unknown => likely pip
update_command = "pip install -U huggingface_hub"
click.echo(
ANSI.yellow(
f"A new version of huggingface_hub ({latest_version}) is available! "
f"You are using version {current_version}.\n"
f"To update, run: {ANSI.bold(update_command)}\n",
)
)

View File

@@ -0,0 +1,147 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to authenticate to the Hugging Face Hub and interact with your repositories.
Usage:
# login and save token locally.
hf auth login --token=hf_*** --add-to-git-credential
# switch between tokens
hf auth switch
# list all tokens
hf auth list
# logout from all tokens
hf auth logout
# check which account you are logged in as
hf auth whoami
"""
from typing import Annotated, Optional
import typer
from huggingface_hub.constants import ENDPOINT
from huggingface_hub.errors import HfHubHTTPError
from huggingface_hub.hf_api import whoami
from .._login import auth_list, auth_switch, login, logout
from ..utils import ANSI, get_stored_tokens, get_token, logging
from ._cli_utils import TokenOpt, typer_factory
logger = logging.get_logger(__name__)
auth_cli = typer_factory(help="Manage authentication (login, logout, etc.).")
@auth_cli.command("login", help="Login using a token from huggingface.co/settings/tokens")
def auth_login(
token: TokenOpt = None,
add_to_git_credential: Annotated[
bool,
typer.Option(
help="Save to git credential helper. Useful only if you plan to run git commands directly.",
),
] = False,
) -> None:
login(token=token, add_to_git_credential=add_to_git_credential)
@auth_cli.command("logout", help="Logout from a specific token")
def auth_logout(
token_name: Annotated[
Optional[str],
typer.Option(
help="Name of token to logout",
),
] = None,
) -> None:
logout(token_name=token_name)
def _select_token_name() -> Optional[str]:
token_names = list(get_stored_tokens().keys())
if not token_names:
logger.error("No stored tokens found. Please login first.")
return None
print("Available stored tokens:")
for i, token_name in enumerate(token_names, 1):
print(f"{i}. {token_name}")
while True:
try:
choice = input("Enter the number of the token to switch to (or 'q' to quit): ")
if choice.lower() == "q":
return None
index = int(choice) - 1
if 0 <= index < len(token_names):
return token_names[index]
else:
print("Invalid selection. Please try again.")
except ValueError:
print("Invalid input. Please enter a number or 'q' to quit.")
@auth_cli.command("switch", help="Switch between access tokens")
def auth_switch_cmd(
token_name: Annotated[
Optional[str],
typer.Option(
help="Name of the token to switch to",
),
] = None,
add_to_git_credential: Annotated[
bool,
typer.Option(
help="Save to git credential helper. Useful only if you plan to run git commands directly.",
),
] = False,
) -> None:
if token_name is None:
token_name = _select_token_name()
if token_name is None:
print("No token name provided. Aborting.")
raise typer.Exit()
auth_switch(token_name, add_to_git_credential=add_to_git_credential)
@auth_cli.command("list", help="List all stored access tokens")
def auth_list_cmd() -> None:
auth_list()
@auth_cli.command("whoami", help="Find out which huggingface.co account you are logged in as.")
def auth_whoami() -> None:
token = get_token()
if token is None:
print("Not logged in")
raise typer.Exit()
try:
info = whoami(token)
print(ANSI.bold("user: "), info["name"])
orgs = [org["name"] for org in info["orgs"]]
if orgs:
print(ANSI.bold("orgs: "), ",".join(orgs))
if ENDPOINT != "https://huggingface.co":
print(f"Authenticated through private endpoint: {ENDPOINT}")
except HfHubHTTPError as e:
print(e)
print(ANSI.red(e.response.text))
raise typer.Exit(code=1)

View File

@@ -0,0 +1,841 @@
# coding=utf-8
# Copyright 2025-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the 'hf cache' command group with cache management subcommands."""
import csv
import json
import re
import sys
import time
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Annotated, Any, Callable, Dict, List, Mapping, Optional, Tuple
import typer
from ..utils import (
ANSI,
CachedRepoInfo,
CachedRevisionInfo,
CacheNotFound,
HFCacheInfo,
_format_size,
scan_cache_dir,
tabulate,
)
from ..utils._parsing import parse_duration, parse_size
from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api, typer_factory
cache_cli = typer_factory(help="Manage local cache directory.")
#### Cache helper utilities
class OutputFormat(str, Enum):
table = "table"
json = "json"
csv = "csv"
@dataclass(frozen=True)
class _DeletionResolution:
revisions: frozenset[str]
selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]]
missing: tuple[str, ...]
_FILTER_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)\s*(?P<op>==|!=|>=|<=|>|<|=)\s*(?P<value>.+)$")
_ALLOWED_OPERATORS = {"=", "!=", ">", "<", ">=", "<="}
_FILTER_KEYS = {"accessed", "modified", "refs", "size", "type"}
_SORT_KEYS = {"accessed", "modified", "name", "size"}
_SORT_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)(?::(?P<order>asc|desc))?$")
_SORT_DEFAULT_ORDER = {
# Default ordering: accessed/modified/size are descending (newest/biggest first), name is ascending
"accessed": "desc",
"modified": "desc",
"size": "desc",
"name": "asc",
}
# Dynamically generate SortOptions enum from _SORT_KEYS
_sort_options_dict = {}
for key in sorted(_SORT_KEYS):
_sort_options_dict[key] = key
_sort_options_dict[f"{key}_asc"] = f"{key}:asc"
_sort_options_dict[f"{key}_desc"] = f"{key}:desc"
SortOptions = Enum("SortOptions", _sort_options_dict, type=str, module=__name__) # type: ignore
@dataclass(frozen=True)
class CacheDeletionCounts:
"""Simple counters summarizing cache deletions for CLI messaging."""
repo_count: int
partial_revision_count: int
total_revision_count: int
CacheEntry = Tuple[CachedRepoInfo, Optional[CachedRevisionInfo]]
RepoRefsMap = Dict[CachedRepoInfo, frozenset[str]]
def summarize_deletions(
selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]],
) -> CacheDeletionCounts:
"""Summarize deletions across repositories."""
repo_count = 0
total_revisions = 0
revisions_in_full_repos = 0
for repo, revisions in selected_by_repo.items():
total_revisions += len(revisions)
if len(revisions) == len(repo.revisions):
repo_count += 1
revisions_in_full_repos += len(revisions)
partial_revision_count = total_revisions - revisions_in_full_repos
return CacheDeletionCounts(repo_count, partial_revision_count, total_revisions)
def print_cache_selected_revisions(selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]]) -> None:
"""Pretty-print selected cache revisions during confirmation prompts."""
for repo in sorted(selected_by_repo.keys(), key=lambda repo: (repo.repo_type, repo.repo_id.lower())):
repo_key = f"{repo.repo_type}/{repo.repo_id}"
revisions = sorted(selected_by_repo[repo], key=lambda rev: rev.commit_hash)
if len(revisions) == len(repo.revisions):
print(f" - {repo_key} (entire repo)")
continue
print(f" - {repo_key}:")
for revision in revisions:
refs = " ".join(sorted(revision.refs)) or "(detached)"
print(f" {revision.commit_hash} [{refs}] {revision.size_on_disk_str}")
def build_cache_index(
hf_cache_info: HFCacheInfo,
) -> Tuple[
Dict[str, CachedRepoInfo],
Dict[str, Tuple[CachedRepoInfo, CachedRevisionInfo]],
]:
"""Create lookup tables so CLI commands can resolve repo ids and revisions quickly."""
repo_lookup: dict[str, CachedRepoInfo] = {}
revision_lookup: dict[str, tuple[CachedRepoInfo, CachedRevisionInfo]] = {}
for repo in hf_cache_info.repos:
repo_key = repo.cache_id.lower()
repo_lookup[repo_key] = repo
for revision in repo.revisions:
revision_lookup[revision.commit_hash.lower()] = (repo, revision)
return repo_lookup, revision_lookup
def collect_cache_entries(
hf_cache_info: HFCacheInfo, *, include_revisions: bool
) -> Tuple[List[CacheEntry], RepoRefsMap]:
"""Flatten cache metadata into rows consumed by `hf cache ls`."""
entries: List[CacheEntry] = []
repo_refs_map: RepoRefsMap = {}
sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: (repo.repo_type, repo.repo_id.lower()))
for repo in sorted_repos:
repo_refs_map[repo] = frozenset({ref for revision in repo.revisions for ref in revision.refs})
if include_revisions:
for revision in sorted(repo.revisions, key=lambda rev: rev.commit_hash):
entries.append((repo, revision))
else:
entries.append((repo, None))
if include_revisions:
entries.sort(
key=lambda entry: (
entry[0].cache_id,
entry[1].commit_hash if entry[1] is not None else "",
)
)
else:
entries.sort(key=lambda entry: entry[0].cache_id)
return entries, repo_refs_map
def compile_cache_filter(
expr: str, repo_refs_map: RepoRefsMap
) -> Callable[[CachedRepoInfo, Optional[CachedRevisionInfo], float], bool]:
"""Convert a `hf cache ls` filter expression into the yes/no test we apply to each cache entry before displaying it."""
match = _FILTER_PATTERN.match(expr.strip())
if not match:
raise ValueError(f"Invalid filter expression: '{expr}'.")
key = match.group("key").lower()
op = match.group("op")
value_raw = match.group("value").strip()
if op not in _ALLOWED_OPERATORS:
raise ValueError(f"Unsupported operator '{op}' in filter '{expr}'. Must be one of {list(_ALLOWED_OPERATORS)}.")
if key not in _FILTER_KEYS:
raise ValueError(f"Unsupported filter key '{key}' in '{expr}'. Must be one of {list(_FILTER_KEYS)}.")
# at this point we know that key is in `_FILTER_KEYS`
if key == "size":
size_threshold = parse_size(value_raw)
return lambda repo, revision, _: _compare_numeric(
revision.size_on_disk if revision is not None else repo.size_on_disk,
op,
size_threshold,
)
if key in {"modified", "accessed"}:
seconds = parse_duration(value_raw.strip())
def _time_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], now: float) -> bool:
timestamp = (
repo.last_accessed
if key == "accessed"
else revision.last_modified
if revision is not None
else repo.last_modified
)
if timestamp is None:
return False
return _compare_numeric(now - timestamp, op, seconds)
return _time_filter
if key == "type":
expected = value_raw.lower()
if op != "=":
raise ValueError(f"Only '=' is supported for 'type' filters. Got '{op}'.")
def _type_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
return repo.repo_type.lower() == expected
return _type_filter
else: # key == "refs"
if op != "=":
raise ValueError(f"Only '=' is supported for 'refs' filters. Got {op}.")
def _refs_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
refs = revision.refs if revision is not None else repo_refs_map.get(repo, frozenset())
return value_raw.lower() in [ref.lower() for ref in refs]
return _refs_filter
def _build_cache_export_payload(
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
) -> List[Dict[str, Any]]:
"""Normalize cache entries into serializable records for JSON/CSV exports."""
payload: List[Dict[str, Any]] = []
for repo, revision in entries:
if include_revisions:
if revision is None:
continue
record: Dict[str, Any] = {
"repo_id": repo.repo_id,
"repo_type": repo.repo_type,
"revision": revision.commit_hash,
"snapshot_path": str(revision.snapshot_path),
"size_on_disk": revision.size_on_disk,
"last_accessed": repo.last_accessed,
"last_modified": revision.last_modified,
"refs": sorted(revision.refs),
}
else:
record = {
"repo_id": repo.repo_id,
"repo_type": repo.repo_type,
"size_on_disk": repo.size_on_disk,
"last_accessed": repo.last_accessed,
"last_modified": repo.last_modified,
"refs": sorted(repo_refs_map.get(repo, frozenset())),
}
payload.append(record)
return payload
def print_cache_entries_table(
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
) -> None:
"""Render cache entries as a table and show a human-readable summary."""
if not entries:
message = "No cached revisions found." if include_revisions else "No cached repositories found."
print(message)
return
table_rows: List[List[str]]
if include_revisions:
headers = ["ID", "REVISION", "SIZE", "LAST_MODIFIED", "REFS"]
table_rows = [
[
repo.cache_id,
revision.commit_hash,
revision.size_on_disk_str.rjust(8),
revision.last_modified_str,
" ".join(sorted(revision.refs)),
]
for repo, revision in entries
if revision is not None
]
else:
headers = ["ID", "SIZE", "LAST_ACCESSED", "LAST_MODIFIED", "REFS"]
table_rows = [
[
repo.cache_id,
repo.size_on_disk_str.rjust(8),
repo.last_accessed_str or "",
repo.last_modified_str,
" ".join(sorted(repo_refs_map.get(repo, frozenset()))),
]
for repo, _ in entries
]
print(tabulate(table_rows, headers=headers)) # type: ignore[arg-type]
unique_repos = {repo for repo, _ in entries}
repo_count = len(unique_repos)
if include_revisions:
revision_count = sum(1 for _, revision in entries if revision is not None)
total_size = sum(revision.size_on_disk for _, revision in entries if revision is not None)
else:
revision_count = sum(len(repo.revisions) for repo in unique_repos)
total_size = sum(repo.size_on_disk for repo in unique_repos)
summary = f"\nFound {repo_count} repo(s) for a total of {revision_count} revision(s) and {_format_size(total_size)} on disk."
print(ANSI.bold(summary))
def print_cache_entries_json(
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
) -> None:
"""Dump cache entries as JSON for scripting or automation."""
payload = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
json.dump(payload, sys.stdout, indent=2)
sys.stdout.write("\n")
def print_cache_entries_csv(entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap) -> None:
"""Export cache entries as CSV rows with the shared payload format."""
records = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
writer = csv.writer(sys.stdout)
if include_revisions:
headers = [
"repo_id",
"repo_type",
"revision",
"snapshot_path",
"size_on_disk",
"last_accessed",
"last_modified",
"refs",
]
else:
headers = ["repo_id", "repo_type", "size_on_disk", "last_accessed", "last_modified", "refs"]
writer.writerow(headers)
if not records:
return
for record in records:
refs = record["refs"]
if include_revisions:
row = [
record.get("repo_id", ""),
record.get("repo_type", ""),
record.get("revision", ""),
record.get("snapshot_path", ""),
record.get("size_on_disk"),
record.get("last_accessed"),
record.get("last_modified"),
" ".join(refs) if refs else "",
]
else:
row = [
record.get("repo_id", ""),
record.get("repo_type", ""),
record.get("size_on_disk"),
record.get("last_accessed"),
record.get("last_modified"),
" ".join(refs) if refs else "",
]
writer.writerow(row)
def _compare_numeric(left: Optional[float], op: str, right: float) -> bool:
"""Evaluate numeric comparisons for filters."""
if left is None:
return False
comparisons = {
"=": left == right,
"!=": left != right,
">": left > right,
"<": left < right,
">=": left >= right,
"<=": left <= right,
}
if op not in comparisons:
raise ValueError(f"Unsupported numeric comparison operator: {op}")
return comparisons[op]
def compile_cache_sort(sort_expr: str) -> tuple[Callable[[CacheEntry], tuple[Any, ...]], bool]:
"""Convert a `hf cache ls` sort expression into a key function for sorting entries.
Returns:
A tuple of (key_function, reverse_flag) where reverse_flag indicates whether
to sort in descending order (True) or ascending order (False).
"""
match = _SORT_PATTERN.match(sort_expr.strip().lower())
if not match:
raise ValueError(f"Invalid sort expression: '{sort_expr}'. Expected format: 'key' or 'key:asc' or 'key:desc'.")
key = match.group("key").lower()
explicit_order = match.group("order")
if key not in _SORT_KEYS:
raise ValueError(f"Unsupported sort key '{key}' in '{sort_expr}'. Must be one of {list(_SORT_KEYS)}.")
# Use explicit order if provided, otherwise use default for the key
order = explicit_order if explicit_order else _SORT_DEFAULT_ORDER[key]
reverse = order == "desc"
def _sort_key(entry: CacheEntry) -> tuple[Any, ...]:
repo, revision = entry
if key == "name":
# Sort by cache_id (repo type/id)
value: Any = repo.cache_id.lower()
return (value,)
if key == "size":
# Use revision size if available, otherwise repo size
value = revision.size_on_disk if revision is not None else repo.size_on_disk
return (value,)
if key == "accessed":
# For revisions, accessed is not available per-revision, use repo's last_accessed
# For repos, use repo's last_accessed
value = repo.last_accessed if repo.last_accessed is not None else 0.0
return (value,)
if key == "modified":
# Use revision's last_modified if available, otherwise repo's last_modified
if revision is not None:
value = revision.last_modified if revision.last_modified is not None else 0.0
else:
value = repo.last_modified if repo.last_modified is not None else 0.0
return (value,)
# Should never reach here due to validation above
raise ValueError(f"Unsupported sort key: {key}")
return _sort_key, reverse
def _resolve_deletion_targets(hf_cache_info: HFCacheInfo, targets: list[str]) -> _DeletionResolution:
"""Resolve the deletion targets into a deletion resolution."""
repo_lookup, revision_lookup = build_cache_index(hf_cache_info)
selected: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
revisions: set[str] = set()
missing: list[str] = []
for raw_target in targets:
target = raw_target.strip()
if not target:
continue
lowered = target.lower()
if re.fullmatch(r"[0-9a-fA-F]{40}", lowered):
match = revision_lookup.get(lowered)
if match is None:
missing.append(raw_target)
continue
repo, revision = match
selected[repo].add(revision)
revisions.add(revision.commit_hash)
continue
matched_repo = repo_lookup.get(lowered)
if matched_repo is None:
missing.append(raw_target)
continue
for revision in matched_repo.revisions:
selected[matched_repo].add(revision)
revisions.add(revision.commit_hash)
frozen_selected = {repo: frozenset(revs) for repo, revs in selected.items()}
return _DeletionResolution(
revisions=frozenset(revisions),
selected=frozen_selected,
missing=tuple(missing),
)
#### Cache CLI commands
@cache_cli.command()
def ls(
cache_dir: Annotated[
Optional[str],
typer.Option(
help="Cache directory to scan (defaults to Hugging Face cache).",
),
] = None,
revisions: Annotated[
bool,
typer.Option(
help="Include revisions in the output instead of aggregated repositories.",
),
] = False,
filter: Annotated[
Optional[list[str]],
typer.Option(
"-f",
"--filter",
help="Filter entries (e.g. 'size>1GB', 'type=model', 'accessed>7d'). Can be used multiple times.",
),
] = None,
format: Annotated[
OutputFormat,
typer.Option(
help="Output format.",
),
] = OutputFormat.table,
quiet: Annotated[
bool,
typer.Option(
"-q",
"--quiet",
help="Print only IDs (repo IDs or revision hashes).",
),
] = False,
sort: Annotated[
Optional[SortOptions],
typer.Option(
help="Sort entries by key. Supported keys: 'accessed', 'modified', 'name', 'size'. "
"Append ':asc' or ':desc' to explicitly set the order (e.g., 'modified:asc'). "
"Defaults: 'accessed', 'modified', 'size' default to 'desc' (newest/biggest first); "
"'name' defaults to 'asc' (alphabetical).",
),
] = None,
limit: Annotated[
Optional[int],
typer.Option(
help="Limit the number of results returned. Returns only the top N entries after sorting.",
),
] = None,
) -> None:
"""List cached repositories or revisions."""
try:
hf_cache_info = scan_cache_dir(cache_dir)
except CacheNotFound as exc:
print(f"Cache directory not found: {str(exc.cache_dir)}")
raise typer.Exit(code=1) from exc
filters = filter or []
entries, repo_refs_map = collect_cache_entries(hf_cache_info, include_revisions=revisions)
try:
filter_fns = [compile_cache_filter(expr, repo_refs_map) for expr in filters]
except ValueError as exc:
raise typer.BadParameter(str(exc)) from exc
now = time.time()
for fn in filter_fns:
entries = [entry for entry in entries if fn(entry[0], entry[1], now)]
# Apply sorting if requested
if sort:
try:
sort_key_fn, reverse = compile_cache_sort(sort.value)
entries.sort(key=sort_key_fn, reverse=reverse)
except ValueError as exc:
raise typer.BadParameter(str(exc)) from exc
# Apply limit if requested
if limit is not None:
if limit < 0:
raise typer.BadParameter(f"Limit must be a positive integer, got {limit}.")
entries = entries[:limit]
if quiet:
for repo, revision in entries:
print(revision.commit_hash if revision is not None else repo.cache_id)
return
formatters = {
OutputFormat.table: print_cache_entries_table,
OutputFormat.json: print_cache_entries_json,
OutputFormat.csv: print_cache_entries_csv,
}
return formatters[format](entries, include_revisions=revisions, repo_refs_map=repo_refs_map)
@cache_cli.command()
def rm(
targets: Annotated[
list[str],
typer.Argument(
help="One or more repo IDs (e.g. model/bert-base-uncased) or revision hashes to delete.",
),
],
cache_dir: Annotated[
Optional[str],
typer.Option(
help="Cache directory to scan (defaults to Hugging Face cache).",
),
] = None,
yes: Annotated[
bool,
typer.Option(
"-y",
"--yes",
help="Skip confirmation prompt.",
),
] = False,
dry_run: Annotated[
bool,
typer.Option(
help="Preview deletions without removing anything.",
),
] = False,
) -> None:
"""Remove cached repositories or revisions."""
try:
hf_cache_info = scan_cache_dir(cache_dir)
except CacheNotFound as exc:
print(f"Cache directory not found: {str(exc.cache_dir)}")
raise typer.Exit(code=1)
resolution = _resolve_deletion_targets(hf_cache_info, targets)
if resolution.missing:
print("Could not find the following targets in the cache:")
for entry in resolution.missing:
print(f" - {entry}")
if len(resolution.revisions) == 0:
print("Nothing to delete.")
raise typer.Exit(code=0)
strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
counts = summarize_deletions(resolution.selected)
summary_parts: list[str] = []
if counts.repo_count:
summary_parts.append(f"{counts.repo_count} repo(s)")
if counts.partial_revision_count:
summary_parts.append(f"{counts.partial_revision_count} revision(s)")
if not summary_parts:
summary_parts.append(f"{counts.total_revision_count} revision(s)")
summary_text = " and ".join(summary_parts)
print(f"About to delete {summary_text} totalling {strategy.expected_freed_size_str}.")
print_cache_selected_revisions(resolution.selected)
if dry_run:
print("Dry run: no files were deleted.")
return
if not yes and not typer.confirm("Proceed with deletion?", default=False):
print("Deletion cancelled.")
return
strategy.execute()
counts = summarize_deletions(resolution.selected)
print(
f"Deleted {counts.repo_count} repo(s) and {counts.total_revision_count} revision(s); freed {strategy.expected_freed_size_str}."
)
@cache_cli.command()
def prune(
cache_dir: Annotated[
Optional[str],
typer.Option(
help="Cache directory to scan (defaults to Hugging Face cache).",
),
] = None,
yes: Annotated[
bool,
typer.Option(
"-y",
"--yes",
help="Skip confirmation prompt.",
),
] = False,
dry_run: Annotated[
bool,
typer.Option(
help="Preview deletions without removing anything.",
),
] = False,
) -> None:
"""Remove detached revisions from the cache."""
try:
hf_cache_info = scan_cache_dir(cache_dir)
except CacheNotFound as exc:
print(f"Cache directory not found: {str(exc.cache_dir)}")
raise typer.Exit(code=1)
selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]] = {}
revisions: set[str] = set()
for repo in hf_cache_info.repos:
detached = frozenset(revision for revision in repo.revisions if len(revision.refs) == 0)
if not detached:
continue
selected[repo] = detached
revisions.update(revision.commit_hash for revision in detached)
if len(revisions) == 0:
print("No unreferenced revisions found. Nothing to prune.")
return
resolution = _DeletionResolution(
revisions=frozenset(revisions),
selected=selected,
missing=(),
)
strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
counts = summarize_deletions(selected)
print(
f"About to delete {counts.total_revision_count} unreferenced revision(s) ({strategy.expected_freed_size_str} total)."
)
print_cache_selected_revisions(selected)
if dry_run:
print("Dry run: no files were deleted.")
return
if not yes and not typer.confirm("Proceed?"):
print("Pruning cancelled.")
return
strategy.execute()
print(f"Deleted {counts.total_revision_count} unreferenced revision(s); freed {strategy.expected_freed_size_str}.")
@cache_cli.command()
def verify(
repo_id: RepoIdArg,
repo_type: RepoTypeOpt = RepoTypeOpt.model,
revision: RevisionOpt = None,
cache_dir: Annotated[
Optional[str],
typer.Option(
help="Cache directory to use when verifying files from cache (defaults to Hugging Face cache).",
),
] = None,
local_dir: Annotated[
Optional[str],
typer.Option(
help="If set, verify files under this directory instead of the cache.",
),
] = None,
fail_on_missing_files: Annotated[
bool,
typer.Option(
"--fail-on-missing-files",
help="Fail if some files exist on the remote but are missing locally.",
),
] = False,
fail_on_extra_files: Annotated[
bool,
typer.Option(
"--fail-on-extra-files",
help="Fail if some files exist locally but are not present on the remote revision.",
),
] = False,
token: TokenOpt = None,
) -> None:
"""Verify checksums for a single repo revision from cache or a local directory.
Examples:
- Verify main revision in cache: `hf cache verify gpt2`
- Verify specific revision: `hf cache verify gpt2 --revision refs/pr/1`
- Verify dataset: `hf cache verify karpathy/fineweb-edu-100b-shuffle --repo-type dataset`
- Verify local dir: `hf cache verify deepseek-ai/DeepSeek-OCR --local-dir /path/to/repo`
"""
if local_dir is not None and cache_dir is not None:
print("Cannot pass both --local-dir and --cache-dir. Use one or the other.")
raise typer.Exit(code=2)
api = get_hf_api(token=token)
result = api.verify_repo_checksums(
repo_id=repo_id,
repo_type=repo_type.value if hasattr(repo_type, "value") else str(repo_type),
revision=revision,
local_dir=local_dir,
cache_dir=cache_dir,
token=token,
)
exit_code = 0
has_mismatches = bool(result.mismatches)
if has_mismatches:
print("❌ Checksum verification failed for the following file(s):")
for m in result.mismatches:
print(f" - {m['path']}: expected {m['expected']} ({m['algorithm']}), got {m['actual']}")
exit_code = 1
if result.missing_paths:
if fail_on_missing_files:
print("Missing files (present remotely, absent locally):")
for p in result.missing_paths:
print(f" - {p}")
exit_code = 1
else:
warning = (
f"{len(result.missing_paths)} remote file(s) are missing locally. "
"Use --fail-on-missing-files for details."
)
print(f"⚠️ {warning}")
if result.extra_paths:
if fail_on_extra_files:
print("Extra files (present locally, absent remotely):")
for p in result.extra_paths:
print(f" - {p}")
exit_code = 1
else:
warning = (
f"{len(result.extra_paths)} local file(s) do not exist on the remote repo. "
"Use --fail-on-extra-files for details."
)
print(f"⚠️ {warning}")
verified_location = result.verified_path
if exit_code != 0:
print(f"❌ Verification failed for '{repo_id}' ({repo_type.value}) in {verified_location}.")
print(f" Revision: {result.revision}")
raise typer.Exit(code=exit_code)
print(f"✅ Verified {result.checked_count} file(s) for '{repo_id}' ({repo_type.value}) in {verified_location}")
print(" All checksums match.")

View File

@@ -0,0 +1,189 @@
# coding=utf-8
# Copyright 202-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to download files from the Hub with the CLI.
Usage:
hf download --help
# Download file
hf download gpt2 config.json
# Download entire repo
hf download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
# Download repo with filters
hf download gpt2 --include="*.safetensors"
# Download with token
hf download Wauplin/private-model --token=hf_***
# Download quietly (no progress bar, no warnings, only the returned path)
hf download gpt2 config.json --quiet
# Download to local dir
hf download gpt2 --local-dir=./models/gpt2
"""
import warnings
from typing import Annotated, Optional, Union
import typer
from huggingface_hub import logging
from huggingface_hub._snapshot_download import snapshot_download
from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download
from huggingface_hub.utils import _format_size, disable_progress_bars, enable_progress_bars, tabulate
from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt
logger = logging.get_logger(__name__)
def download(
repo_id: RepoIdArg,
filenames: Annotated[
Optional[list[str]],
typer.Argument(
help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).",
),
] = None,
repo_type: RepoTypeOpt = RepoTypeOpt.model,
revision: RevisionOpt = None,
include: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to include from files to download. eg: *.json",
),
] = None,
exclude: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to exclude from files to download.",
),
] = None,
cache_dir: Annotated[
Optional[str],
typer.Option(
help="Directory where to save files.",
),
] = None,
local_dir: Annotated[
Optional[str],
typer.Option(
help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more details.",
),
] = None,
force_download: Annotated[
bool,
typer.Option(
help="If True, the files will be downloaded even if they are already cached.",
),
] = False,
dry_run: Annotated[
bool,
typer.Option(
help="If True, perform a dry run without actually downloading the file.",
),
] = False,
token: TokenOpt = None,
quiet: Annotated[
bool,
typer.Option(
help="If True, progress bars are disabled and only the path to the download files is printed.",
),
] = False,
max_workers: Annotated[
int,
typer.Option(
help="Maximum number of workers to use for downloading files. Default is 8.",
),
] = 8,
) -> None:
"""Download files from the Hub."""
def run_download() -> Union[str, DryRunFileInfo, list[DryRunFileInfo]]:
filenames_list = filenames if filenames is not None else []
# Warn user if patterns are ignored
if len(filenames_list) > 0:
if include is not None and len(include) > 0:
warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
if exclude is not None and len(exclude) > 0:
warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
# Single file to download: use `hf_hub_download`
if len(filenames_list) == 1:
return hf_hub_download(
repo_id=repo_id,
repo_type=repo_type.value,
revision=revision,
filename=filenames_list[0],
cache_dir=cache_dir,
force_download=force_download,
token=token,
local_dir=local_dir,
library_name="huggingface-cli",
dry_run=dry_run,
)
# Otherwise: use `snapshot_download` to ensure all files comes from same revision
if len(filenames_list) == 0:
allow_patterns = include
ignore_patterns = exclude
else:
allow_patterns = filenames_list
ignore_patterns = None
return snapshot_download(
repo_id=repo_id,
repo_type=repo_type.value,
revision=revision,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
force_download=force_download,
cache_dir=cache_dir,
token=token,
local_dir=local_dir,
library_name="huggingface-cli",
max_workers=max_workers,
dry_run=dry_run,
)
def _print_result(result: Union[str, DryRunFileInfo, list[DryRunFileInfo]]) -> None:
if isinstance(result, str):
print(result)
return
# Print dry run info
if isinstance(result, DryRunFileInfo):
result = [result]
print(
f"[dry-run] Will download {len([r for r in result if r.will_download])} files (out of {len(result)}) totalling {_format_size(sum(r.file_size for r in result if r.will_download))}."
)
columns = ["File", "Bytes to download"]
items: list[list[Union[str, int]]] = []
for info in sorted(result, key=lambda x: x.filename):
items.append([info.filename, _format_size(info.file_size) if info.will_download else "-"])
print(tabulate(items, headers=columns))
if quiet:
disable_progress_bars()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
_print_result(run_download())
enable_progress_bars()
else:
_print_result(run_download())
logging.set_verbosity_warning()

View File

@@ -0,0 +1,60 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from huggingface_hub.cli._cli_utils import check_cli_update, typer_factory
from huggingface_hub.cli.auth import auth_cli
from huggingface_hub.cli.cache import cache_cli
from huggingface_hub.cli.download import download
from huggingface_hub.cli.inference_endpoints import ie_cli
from huggingface_hub.cli.jobs import jobs_cli
from huggingface_hub.cli.lfs import lfs_enable_largefiles, lfs_multipart_upload
from huggingface_hub.cli.repo import repo_cli
from huggingface_hub.cli.repo_files import repo_files_cli
from huggingface_hub.cli.system import env, version
from huggingface_hub.cli.upload import upload
from huggingface_hub.cli.upload_large_folder import upload_large_folder
from huggingface_hub.utils import logging
app = typer_factory(help="Hugging Face Hub CLI")
# top level single commands (defined in their respective files)
app.command(help="Download files from the Hub.")(download)
app.command(help="Upload a file or a folder to the Hub.")(upload)
app.command(help="Upload a large folder to the Hub. Recommended for resumable uploads.")(upload_large_folder)
app.command(name="env", help="Print information about the environment.")(env)
app.command(help="Print information about the hf version.")(version)
app.command(help="Configure your repository to enable upload of files > 5GB.", hidden=True)(lfs_enable_largefiles)
app.command(help="Upload large files to the Hub.", hidden=True)(lfs_multipart_upload)
# command groups
app.add_typer(auth_cli, name="auth")
app.add_typer(cache_cli, name="cache")
app.add_typer(repo_cli, name="repo")
app.add_typer(repo_files_cli, name="repo-files")
app.add_typer(jobs_cli, name="jobs")
app.add_typer(ie_cli, name="endpoints")
def main():
logging.set_verbosity_info()
check_cli_update()
app()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,377 @@
"""CLI commands for Hugging Face Inference Endpoints."""
import json
from typing import Annotated, Optional
import typer
from huggingface_hub._inference_endpoints import InferenceEndpoint
from huggingface_hub.errors import HfHubHTTPError
from ._cli_utils import TokenOpt, get_hf_api, typer_factory
ie_cli = typer_factory(help="Manage Hugging Face Inference Endpoints.")
catalog_app = typer_factory(help="Interact with the Inference Endpoints catalog.")
NameArg = Annotated[
str,
typer.Argument(help="Endpoint name."),
]
NameOpt = Annotated[
Optional[str],
typer.Option(help="Endpoint name."),
]
NamespaceOpt = Annotated[
Optional[str],
typer.Option(
help="The namespace associated with the Inference Endpoint. Defaults to the current user's namespace.",
),
]
def _print_endpoint(endpoint: InferenceEndpoint) -> None:
typer.echo(json.dumps(endpoint.raw, indent=2, sort_keys=True))
@ie_cli.command()
def ls(
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
"""Lists all Inference Endpoints for the given namespace."""
api = get_hf_api(token=token)
try:
endpoints = api.list_inference_endpoints(namespace=namespace, token=token)
except HfHubHTTPError as error:
typer.echo(f"Listing failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
typer.echo(
json.dumps(
{"items": [endpoint.raw for endpoint in endpoints]},
indent=2,
sort_keys=True,
)
)
@ie_cli.command(name="deploy")
def deploy(
name: NameArg,
repo: Annotated[
str,
typer.Option(
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
),
],
framework: Annotated[
str,
typer.Option(
help="The machine learning framework used for the model (e.g. 'vllm').",
),
],
accelerator: Annotated[
str,
typer.Option(
help="The hardware accelerator to be used for inference (e.g. 'cpu').",
),
],
instance_size: Annotated[
str,
typer.Option(
help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
),
],
instance_type: Annotated[
str,
typer.Option(
help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
),
],
region: Annotated[
str,
typer.Option(
help="The cloud region in which the Inference Endpoint will be created (e.g. 'us-east-1').",
),
],
vendor: Annotated[
str,
typer.Option(
help="The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. 'aws').",
),
],
*,
namespace: NamespaceOpt = None,
task: Annotated[
Optional[str],
typer.Option(
help="The task on which to deploy the model (e.g. 'text-classification').",
),
] = None,
token: TokenOpt = None,
) -> None:
"""Deploy an Inference Endpoint from a Hub repository."""
api = get_hf_api(token=token)
endpoint = api.create_inference_endpoint(
name=name,
repository=repo,
framework=framework,
accelerator=accelerator,
instance_size=instance_size,
instance_type=instance_type,
region=region,
vendor=vendor,
namespace=namespace,
task=task,
token=token,
)
_print_endpoint(endpoint)
@catalog_app.command(name="deploy")
def deploy_from_catalog(
repo: Annotated[
str,
typer.Option(
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
),
],
name: NameOpt = None,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
"""Deploy an Inference Endpoint from the Model Catalog."""
api = get_hf_api(token=token)
try:
endpoint = api.create_inference_endpoint_from_catalog(
repo_id=repo,
name=name,
namespace=namespace,
token=token,
)
except HfHubHTTPError as error:
typer.echo(f"Deployment failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)
def list_catalog(
token: TokenOpt = None,
) -> None:
"""List available Catalog models."""
api = get_hf_api(token=token)
try:
models = api.list_inference_catalog(token=token)
except HfHubHTTPError as error:
typer.echo(f"Catalog fetch failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
typer.echo(json.dumps({"models": models}, indent=2, sort_keys=True))
catalog_app.command(name="ls")(list_catalog)
ie_cli.command(name="list-catalog", help="List available Catalog models.", hidden=True)(list_catalog)
ie_cli.add_typer(catalog_app, name="catalog")
@ie_cli.command()
def describe(
name: NameArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
"""Get information about an existing endpoint."""
api = get_hf_api(token=token)
try:
endpoint = api.get_inference_endpoint(name=name, namespace=namespace, token=token)
except HfHubHTTPError as error:
typer.echo(f"Fetch failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)
@ie_cli.command()
def update(
name: NameArg,
namespace: NamespaceOpt = None,
repo: Annotated[
Optional[str],
typer.Option(
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
),
] = None,
accelerator: Annotated[
Optional[str],
typer.Option(
help="The hardware accelerator to be used for inference (e.g. 'cpu').",
),
] = None,
instance_size: Annotated[
Optional[str],
typer.Option(
help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
),
] = None,
instance_type: Annotated[
Optional[str],
typer.Option(
help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
),
] = None,
framework: Annotated[
Optional[str],
typer.Option(
help="The machine learning framework used for the model (e.g. 'custom').",
),
] = None,
revision: Annotated[
Optional[str],
typer.Option(
help="The specific model revision to deploy on the Inference Endpoint (e.g. '6c0e6080953db56375760c0471a8c5f2929baf11').",
),
] = None,
task: Annotated[
Optional[str],
typer.Option(
help="The task on which to deploy the model (e.g. 'text-classification').",
),
] = None,
min_replica: Annotated[
Optional[int],
typer.Option(
help="The minimum number of replicas (instances) to keep running for the Inference Endpoint.",
),
] = None,
max_replica: Annotated[
Optional[int],
typer.Option(
help="The maximum number of replicas (instances) to scale to for the Inference Endpoint.",
),
] = None,
scale_to_zero_timeout: Annotated[
Optional[int],
typer.Option(
help="The duration in minutes before an inactive endpoint is scaled to zero.",
),
] = None,
token: TokenOpt = None,
) -> None:
"""Update an existing endpoint."""
api = get_hf_api(token=token)
try:
endpoint = api.update_inference_endpoint(
name=name,
namespace=namespace,
repository=repo,
framework=framework,
revision=revision,
task=task,
accelerator=accelerator,
instance_size=instance_size,
instance_type=instance_type,
min_replica=min_replica,
max_replica=max_replica,
scale_to_zero_timeout=scale_to_zero_timeout,
token=token,
)
except HfHubHTTPError as error:
typer.echo(f"Update failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)
@ie_cli.command()
def delete(
name: NameArg,
namespace: NamespaceOpt = None,
yes: Annotated[
bool,
typer.Option("--yes", help="Skip confirmation prompts."),
] = False,
token: TokenOpt = None,
) -> None:
"""Delete an Inference Endpoint permanently."""
if not yes:
confirmation = typer.prompt(f"Delete endpoint '{name}'? Type the name to confirm.")
if confirmation != name:
typer.echo("Aborted.")
raise typer.Exit(code=2)
api = get_hf_api(token=token)
try:
api.delete_inference_endpoint(name=name, namespace=namespace, token=token)
except HfHubHTTPError as error:
typer.echo(f"Delete failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
typer.echo(f"Deleted '{name}'.")
@ie_cli.command()
def pause(
name: NameArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
"""Pause an Inference Endpoint."""
api = get_hf_api(token=token)
try:
endpoint = api.pause_inference_endpoint(name=name, namespace=namespace, token=token)
except HfHubHTTPError as error:
typer.echo(f"Pause failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)
@ie_cli.command()
def resume(
name: NameArg,
namespace: NamespaceOpt = None,
fail_if_already_running: Annotated[
bool,
typer.Option(
"--fail-if-already-running",
help="If `True`, the method will raise an error if the Inference Endpoint is already running.",
),
] = False,
token: TokenOpt = None,
) -> None:
"""Resume an Inference Endpoint."""
api = get_hf_api(token=token)
try:
endpoint = api.resume_inference_endpoint(
name=name,
namespace=namespace,
token=token,
running_ok=not fail_if_already_running,
)
except HfHubHTTPError as error:
typer.echo(f"Resume failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)
@ie_cli.command()
def scale_to_zero(
name: NameArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
"""Scale an Inference Endpoint to zero."""
api = get_hf_api(token=token)
try:
endpoint = api.scale_to_zero_inference_endpoint(name=name, namespace=namespace, token=token)
except HfHubHTTPError as error:
typer.echo(f"Scale To Zero failed: {error}")
raise typer.Exit(code=error.response.status_code) from error
_print_endpoint(endpoint)

View File

@@ -0,0 +1,772 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to interact with jobs on the Hugging Face Hub.
Usage:
# run a job
hf jobs run <image> <command>
# List running or completed jobs
hf jobs ps [-a] [-f key=value] [--format TEMPLATE]
# Stream logs from a job
hf jobs logs <job-id>
# Inspect detailed information about a job
hf jobs inspect <job-id>
# Cancel a running job
hf jobs cancel <job-id>
# Run a UV script
hf jobs uv run <script>
# Schedule a job
hf jobs scheduled run <schedule> <image> <command>
# List scheduled jobs
hf jobs scheduled ps [-a] [-f key=value] [--format TEMPLATE]
# Inspect a scheduled job
hf jobs scheduled inspect <scheduled_job_id>
# Suspend a scheduled job
hf jobs scheduled suspend <scheduled_job_id>
# Resume a scheduled job
hf jobs scheduled resume <scheduled_job_id>
# Delete a scheduled job
hf jobs scheduled delete <scheduled_job_id>
"""
import json
import os
import re
from dataclasses import asdict
from pathlib import Path
from typing import Annotated, Dict, Optional, Union
import typer
from huggingface_hub import SpaceHardware, get_token
from huggingface_hub.errors import HfHubHTTPError
from huggingface_hub.utils import logging
from huggingface_hub.utils._dotenv import load_dotenv
from ._cli_utils import TokenOpt, get_hf_api, typer_factory
logger = logging.get_logger(__name__)
SUGGESTED_FLAVORS = [item.value for item in SpaceHardware if item.value != "zero-a10g"]
# Common job-related options
ImageArg = Annotated[
str,
typer.Argument(
help="The Docker image to use.",
),
]
ImageOpt = Annotated[
Optional[str],
typer.Option(
help="Use a custom Docker image with `uv` installed.",
),
]
FlavorOpt = Annotated[
Optional[SpaceHardware],
typer.Option(
help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
),
]
EnvOpt = Annotated[
Optional[list[str]],
typer.Option(
"-e",
"--env",
help="Set environment variables. E.g. --env ENV=value",
),
]
SecretsOpt = Annotated[
Optional[list[str]],
typer.Option(
"-s",
"--secrets",
help="Set secret environment variables. E.g. --secrets SECRET=value or `--secrets HF_TOKEN` to pass your Hugging Face token.",
),
]
EnvFileOpt = Annotated[
Optional[str],
typer.Option(
"--env-file",
help="Read in a file of environment variables.",
),
]
SecretsFileOpt = Annotated[
Optional[str],
typer.Option(
help="Read in a file of secret environment variables.",
),
]
TimeoutOpt = Annotated[
Optional[str],
typer.Option(
help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).",
),
]
DetachOpt = Annotated[
bool,
typer.Option(
"-d",
"--detach",
help="Run the Job in the background and print the Job ID.",
),
]
NamespaceOpt = Annotated[
Optional[str],
typer.Option(
help="The namespace where the job will be running. Defaults to the current user's namespace.",
),
]
WithOpt = Annotated[
Optional[list[str]],
typer.Option(
"--with",
help="Run with the given packages installed",
),
]
PythonOpt = Annotated[
Optional[str],
typer.Option(
"-p",
"--python",
help="The Python interpreter to use for the run environment",
),
]
SuspendOpt = Annotated[
Optional[bool],
typer.Option(
help="Suspend (pause) the scheduled Job",
),
]
ConcurrencyOpt = Annotated[
Optional[bool],
typer.Option(
help="Allow multiple instances of this Job to run concurrently",
),
]
ScheduleArg = Annotated[
str,
typer.Argument(
help="One of annually, yearly, monthly, weekly, daily, hourly, or a CRON schedule expression.",
),
]
ScriptArg = Annotated[
str,
typer.Argument(
help="UV script to run (local file or URL)",
),
]
ScriptArgsArg = Annotated[
Optional[list[str]],
typer.Argument(
help="Arguments for the script",
),
]
CommandArg = Annotated[
list[str],
typer.Argument(
help="The command to run.",
),
]
JobIdArg = Annotated[
str,
typer.Argument(
help="Job ID",
),
]
ScheduledJobIdArg = Annotated[
str,
typer.Argument(
help="Scheduled Job ID",
),
]
RepoOpt = Annotated[
Optional[str],
typer.Option(
help="Repository name for the script (creates ephemeral if not specified)",
),
]
jobs_cli = typer_factory(help="Run and manage Jobs on the Hub.")
@jobs_cli.command("run", help="Run a Job")
def jobs_run(
image: ImageArg,
command: CommandArg,
env: EnvOpt = None,
secrets: SecretsOpt = None,
env_file: EnvFileOpt = None,
secrets_file: SecretsFileOpt = None,
flavor: FlavorOpt = None,
timeout: TimeoutOpt = None,
detach: DetachOpt = False,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
env_map: dict[str, Optional[str]] = {}
if env_file:
env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
for env_value in env or []:
env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
secrets_map: dict[str, Optional[str]] = {}
extended_environ = _get_extended_environ()
if secrets_file:
secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
for secret in secrets or []:
secrets_map.update(load_dotenv(secret, environ=extended_environ))
api = get_hf_api(token=token)
job = api.run_job(
image=image,
command=command,
env=env_map,
secrets=secrets_map,
flavor=flavor,
timeout=timeout,
namespace=namespace,
)
# Always print the job ID to the user
print(f"Job started with ID: {job.id}")
print(f"View at: {job.url}")
if detach:
return
# Now let's stream the logs
for log in api.fetch_job_logs(job_id=job.id):
print(log)
@jobs_cli.command("logs", help="Fetch the logs of a Job")
def jobs_logs(
job_id: JobIdArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
for log in api.fetch_job_logs(job_id=job_id, namespace=namespace):
print(log)
def _matches_filters(job_properties: dict[str, str], filters: dict[str, str]) -> bool:
"""Check if scheduled job matches all specified filters."""
for key, pattern in filters.items():
# Check if property exists
if key not in job_properties:
return False
# Support pattern matching with wildcards
if "*" in pattern or "?" in pattern:
# Convert glob pattern to regex
regex_pattern = pattern.replace("*", ".*").replace("?", ".")
if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE):
return False
# Simple substring matching
elif pattern.lower() not in job_properties[key].lower():
return False
return True
def _print_output(rows: list[list[Union[str, int]]], headers: list[str], fmt: Optional[str]) -> None:
"""Print output according to the chosen format."""
if fmt:
# Use custom template if provided
template = fmt
for row in rows:
line = template
for i, field in enumerate(["id", "image", "command", "created", "status"]):
placeholder = f"{{{{.{field}}}}}"
if placeholder in line:
line = line.replace(placeholder, str(row[i]))
print(line)
else:
# Default tabular format
print(_tabulate(rows, headers=headers))
@jobs_cli.command("ps", help="List Jobs")
def jobs_ps(
all: Annotated[
bool,
typer.Option(
"-a",
"--all",
help="Show all Jobs (default shows just running)",
),
] = False,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
filter: Annotated[
Optional[list[str]],
typer.Option(
"-f",
"--filter",
help="Filter output based on conditions provided (format: key=value)",
),
] = None,
format: Annotated[
Optional[str],
typer.Option(
help="Format output using a custom template",
),
] = None,
) -> None:
try:
api = get_hf_api(token=token)
# Fetch jobs data
jobs = api.list_jobs(namespace=namespace)
# Define table headers
table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"]
rows: list[list[Union[str, int]]] = []
filters: dict[str, str] = {}
for f in filter or []:
if "=" in f:
key, value = f.split("=", 1)
filters[key.lower()] = value
else:
print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.")
# Process jobs data
for job in jobs:
# Extract job data for filtering
status = job.status.stage if job.status else "UNKNOWN"
if not all and status not in ("RUNNING", "UPDATING"):
# Skip job if not all jobs should be shown and status doesn't match criteria
continue
# Extract job data for output
job_id = job.id
# Extract image or space information
image_or_space = job.docker_image or "N/A"
# Extract and format command
cmd = job.command or []
command_str = " ".join(cmd) if cmd else "N/A"
# Extract creation time
created_at = job.created_at.strftime("%Y-%m-%d %H:%M:%S") if job.created_at else "N/A"
# Create a dict with all job properties for filtering
props = {"id": job_id, "image": image_or_space, "status": status.lower(), "command": command_str}
if not _matches_filters(props, filters):
continue
# Create row
rows.append([job_id, image_or_space, command_str, created_at, status])
# Handle empty results
if not rows:
filters_msg = (
f" matching filters: {', '.join([f'{k}={v}' for k, v in filters.items()])}" if filters else ""
)
print(f"No jobs found{filters_msg}")
return
# Apply custom format if provided or use default tabular format
_print_output(rows, table_headers, format)
except HfHubHTTPError as e:
print(f"Error fetching jobs data: {e}")
except (KeyError, ValueError, TypeError) as e:
print(f"Error processing jobs data: {e}")
except Exception as e:
print(f"Unexpected error - {type(e).__name__}: {e}")
@jobs_cli.command("inspect", help="Display detailed information on one or more Jobs")
def jobs_inspect(
job_ids: Annotated[
list[str],
typer.Argument(
help="The jobs to inspect",
),
],
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
jobs = [api.inspect_job(job_id=job_id, namespace=namespace) for job_id in job_ids]
print(json.dumps([asdict(job) for job in jobs], indent=4, default=str))
@jobs_cli.command("cancel", help="Cancel a Job")
def jobs_cancel(
job_id: JobIdArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
api.cancel_job(job_id=job_id, namespace=namespace)
uv_app = typer_factory(help="Run UV scripts (Python with inline dependencies) on HF infrastructure")
jobs_cli.add_typer(uv_app, name="uv")
@uv_app.command("run", help="Run a UV script (local file or URL) on HF infrastructure")
def jobs_uv_run(
script: ScriptArg,
script_args: ScriptArgsArg = None,
image: ImageOpt = None,
repo: RepoOpt = None,
flavor: FlavorOpt = None,
env: EnvOpt = None,
secrets: SecretsOpt = None,
env_file: EnvFileOpt = None,
secrets_file: SecretsFileOpt = None,
timeout: TimeoutOpt = None,
detach: DetachOpt = False,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
with_: WithOpt = None,
python: PythonOpt = None,
) -> None:
env_map: dict[str, Optional[str]] = {}
if env_file:
env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
for env_value in env or []:
env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
secrets_map: dict[str, Optional[str]] = {}
extended_environ = _get_extended_environ()
if secrets_file:
secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
for secret in secrets or []:
secrets_map.update(load_dotenv(secret, environ=extended_environ))
api = get_hf_api(token=token)
job = api.run_uv_job(
script=script,
script_args=script_args or [],
dependencies=with_,
python=python,
image=image,
env=env_map,
secrets=secrets_map,
flavor=flavor, # type: ignore[arg-type]
timeout=timeout,
namespace=namespace,
_repo=repo,
)
# Always print the job ID to the user
print(f"Job started with ID: {job.id}")
print(f"View at: {job.url}")
if detach:
return
# Now let's stream the logs
for log in api.fetch_job_logs(job_id=job.id):
print(log)
scheduled_app = typer_factory(help="Create and manage scheduled Jobs on the Hub.")
jobs_cli.add_typer(scheduled_app, name="scheduled")
@scheduled_app.command("run", help="Schedule a Job")
def scheduled_run(
schedule: ScheduleArg,
image: ImageArg,
command: CommandArg,
suspend: SuspendOpt = None,
concurrency: ConcurrencyOpt = None,
env: EnvOpt = None,
secrets: SecretsOpt = None,
env_file: EnvFileOpt = None,
secrets_file: SecretsFileOpt = None,
flavor: FlavorOpt = None,
timeout: TimeoutOpt = None,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
env_map: dict[str, Optional[str]] = {}
if env_file:
env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
for env_value in env or []:
env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
secrets_map: dict[str, Optional[str]] = {}
extended_environ = _get_extended_environ()
if secrets_file:
secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
for secret in secrets or []:
secrets_map.update(load_dotenv(secret, environ=extended_environ))
api = get_hf_api(token=token)
scheduled_job = api.create_scheduled_job(
image=image,
command=command,
schedule=schedule,
suspend=suspend,
concurrency=concurrency,
env=env_map,
secrets=secrets_map,
flavor=flavor,
timeout=timeout,
namespace=namespace,
)
print(f"Scheduled Job created with ID: {scheduled_job.id}")
@scheduled_app.command("ps", help="List scheduled Jobs")
def scheduled_ps(
all: Annotated[
bool,
typer.Option(
"-a",
"--all",
help="Show all scheduled Jobs (default hides suspended)",
),
] = False,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
filter: Annotated[
Optional[list[str]],
typer.Option(
"-f",
"--filter",
help="Filter output based on conditions provided (format: key=value)",
),
] = None,
format: Annotated[
Optional[str],
typer.Option(
"--format",
help="Format output using a custom template",
),
] = None,
) -> None:
try:
api = get_hf_api(token=token)
scheduled_jobs = api.list_scheduled_jobs(namespace=namespace)
table_headers = ["ID", "SCHEDULE", "IMAGE/SPACE", "COMMAND", "LAST RUN", "NEXT RUN", "SUSPEND"]
rows: list[list[Union[str, int]]] = []
filters: dict[str, str] = {}
for f in filter or []:
if "=" in f:
key, value = f.split("=", 1)
filters[key.lower()] = value
else:
print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.")
for scheduled_job in scheduled_jobs:
suspend = scheduled_job.suspend or False
if not all and suspend:
continue
sj_id = scheduled_job.id
schedule = scheduled_job.schedule or "N/A"
image_or_space = scheduled_job.job_spec.docker_image or "N/A"
cmd = scheduled_job.job_spec.command or []
command_str = " ".join(cmd) if cmd else "N/A"
last_job_at = (
scheduled_job.status.last_job.at.strftime("%Y-%m-%d %H:%M:%S")
if scheduled_job.status.last_job
else "N/A"
)
next_job_run_at = (
scheduled_job.status.next_job_run_at.strftime("%Y-%m-%d %H:%M:%S")
if scheduled_job.status.next_job_run_at
else "N/A"
)
props = {"id": sj_id, "image": image_or_space, "suspend": str(suspend), "command": command_str}
if not _matches_filters(props, filters):
continue
rows.append([sj_id, schedule, image_or_space, command_str, last_job_at, next_job_run_at, suspend])
if not rows:
filters_msg = (
f" matching filters: {', '.join([f'{k}={v}' for k, v in filters.items()])}" if filters else ""
)
print(f"No scheduled jobs found{filters_msg}")
return
_print_output(rows, table_headers, format)
except HfHubHTTPError as e:
print(f"Error fetching scheduled jobs data: {e}")
except (KeyError, ValueError, TypeError) as e:
print(f"Error processing scheduled jobs data: {e}")
except Exception as e:
print(f"Unexpected error - {type(e).__name__}: {e}")
@scheduled_app.command("inspect", help="Display detailed information on one or more scheduled Jobs")
def scheduled_inspect(
scheduled_job_ids: Annotated[
list[str],
typer.Argument(
help="The scheduled jobs to inspect",
),
],
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
scheduled_jobs = [
api.inspect_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
for scheduled_job_id in scheduled_job_ids
]
print(json.dumps([asdict(scheduled_job) for scheduled_job in scheduled_jobs], indent=4, default=str))
@scheduled_app.command("delete", help="Delete a scheduled Job")
def scheduled_delete(
scheduled_job_id: ScheduledJobIdArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
api.delete_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
@scheduled_app.command("suspend", help="Suspend (pause) a scheduled Job")
def scheduled_suspend(
scheduled_job_id: ScheduledJobIdArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
api.suspend_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
@scheduled_app.command("resume", help="Resume (unpause) a scheduled Job")
def scheduled_resume(
scheduled_job_id: ScheduledJobIdArg,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
api.resume_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
scheduled_uv_app = typer_factory(help="Schedule UV scripts on HF infrastructure")
scheduled_app.add_typer(scheduled_uv_app, name="uv")
@scheduled_uv_app.command("run", help="Run a UV script (local file or URL) on HF infrastructure")
def scheduled_uv_run(
schedule: ScheduleArg,
script: ScriptArg,
script_args: ScriptArgsArg = None,
suspend: SuspendOpt = None,
concurrency: ConcurrencyOpt = None,
image: ImageOpt = None,
repo: RepoOpt = None,
flavor: FlavorOpt = None,
env: EnvOpt = None,
secrets: SecretsOpt = None,
env_file: EnvFileOpt = None,
secrets_file: SecretsFileOpt = None,
timeout: TimeoutOpt = None,
namespace: NamespaceOpt = None,
token: TokenOpt = None,
with_: WithOpt = None,
python: PythonOpt = None,
) -> None:
env_map: dict[str, Optional[str]] = {}
if env_file:
env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
for env_value in env or []:
env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
secrets_map: dict[str, Optional[str]] = {}
extended_environ = _get_extended_environ()
if secrets_file:
secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
for secret in secrets or []:
secrets_map.update(load_dotenv(secret, environ=extended_environ))
api = get_hf_api(token=token)
job = api.create_scheduled_uv_job(
script=script,
script_args=script_args or [],
schedule=schedule,
suspend=suspend,
concurrency=concurrency,
dependencies=with_,
python=python,
image=image,
env=env_map,
secrets=secrets_map,
flavor=flavor, # type: ignore[arg-type]
timeout=timeout,
namespace=namespace,
_repo=repo,
)
print(f"Scheduled Job created with ID: {job.id}")
### UTILS
def _tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str:
"""
Inspired by:
- stackoverflow.com/a/8356620/593036
- stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
"""
col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
terminal_width = max(os.get_terminal_size().columns, len(headers) * 12)
while len(headers) + sum(col_widths) > terminal_width:
col_to_minimize = col_widths.index(max(col_widths))
col_widths[col_to_minimize] //= 2
if len(headers) + sum(col_widths) <= terminal_width:
col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize]
row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
lines = []
lines.append(row_format.format(*headers))
lines.append(row_format.format(*["-" * w for w in col_widths]))
for row in rows:
row_format_args = [
str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x)
for x, col_width in zip(row, col_widths)
]
lines.append(row_format.format(*row_format_args))
return "\n".join(lines)
def _get_extended_environ() -> Dict[str, str]:
extended_environ = os.environ.copy()
if (token := get_token()) is not None:
extended_environ["HF_TOKEN"] = token
return extended_environ

View File

@@ -0,0 +1,175 @@
"""
Implementation of a custom transfer agent for the transfer type "multipart" for
git-lfs.
Inspired by:
github.com/cbartz/git-lfs-swift-transfer-agent/blob/master/git_lfs_swift_transfer.py
Spec is: github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md
To launch debugger while developing:
``` [lfs "customtransfer.multipart"]
path = /path/to/huggingface_hub/.env/bin/python args = -m debugpy --listen 5678
--wait-for-client
/path/to/huggingface_hub/src/huggingface_hub/commands/huggingface_cli.py
lfs-multipart-upload ```"""
import json
import os
import subprocess
import sys
from typing import Annotated, Optional
import typer
from huggingface_hub.lfs import LFS_MULTIPART_UPLOAD_COMMAND
from ..utils import get_session, hf_raise_for_status, logging
from ..utils._lfs import SliceFileObj
logger = logging.get_logger(__name__)
def lfs_enable_largefiles(
path: Annotated[
str,
typer.Argument(
help="Local path to repository you want to configure.",
),
],
) -> None:
"""
Configure a local git repository to use the multipart transfer agent for large files.
This command sets up git-lfs to use the custom multipart transfer agent
which enables efficient uploading of large files in chunks.
"""
local_path = os.path.abspath(path)
if not os.path.isdir(local_path):
print("This does not look like a valid git repo.")
raise typer.Exit(code=1)
subprocess.run(
"git config lfs.customtransfer.multipart.path hf".split(),
check=True,
cwd=local_path,
)
subprocess.run(
f"git config lfs.customtransfer.multipart.args {LFS_MULTIPART_UPLOAD_COMMAND}".split(),
check=True,
cwd=local_path,
)
print("Local repo set up for largefiles")
def write_msg(msg: dict):
"""Write out the message in Line delimited JSON."""
msg_str = json.dumps(msg) + "\n"
sys.stdout.write(msg_str)
sys.stdout.flush()
def read_msg() -> Optional[dict]:
"""Read Line delimited JSON from stdin."""
msg = json.loads(sys.stdin.readline().strip())
if "terminate" in (msg.get("type"), msg.get("event")):
# terminate message received
return None
if msg.get("event") not in ("download", "upload"):
logger.critical("Received unexpected message")
sys.exit(1)
return msg
def lfs_multipart_upload() -> None:
"""Internal git-lfs custom transfer agent for multipart uploads.
This function implements the custom transfer protocol for git-lfs multipart uploads.
Handles chunked uploads of large files to Hugging Face Hub.
"""
# Immediately after invoking a custom transfer process, git-lfs
# sends initiation data to the process over stdin.
# This tells the process useful information about the configuration.
init_msg = json.loads(sys.stdin.readline().strip())
if not (init_msg.get("event") == "init" and init_msg.get("operation") == "upload"):
write_msg({"error": {"code": 32, "message": "Wrong lfs init operation"}})
sys.exit(1)
# The transfer process should use the information it needs from the
# initiation structure, and also perform any one-off setup tasks it
# needs to do. It should then respond on stdout with a simple empty
# confirmation structure, as follows:
write_msg({})
# After the initiation exchange, git-lfs will send any number of
# transfer requests to the stdin of the transfer process, in a serial sequence.
while True:
msg = read_msg()
if msg is None:
# When all transfers have been processed, git-lfs will send
# a terminate event to the stdin of the transfer process.
# On receiving this message the transfer process should
# clean up and terminate. No response is expected.
sys.exit(0)
oid = msg["oid"]
filepath = msg["path"]
completion_url = msg["action"]["href"]
header = msg["action"]["header"]
chunk_size = int(header.pop("chunk_size"))
presigned_urls: list[str] = list(header.values())
# Send a "started" progress event to allow other workers to start.
# Otherwise they're delayed until first "progress" event is reported,
# i.e. after the first 5GB by default (!)
write_msg(
{
"event": "progress",
"oid": oid,
"bytesSoFar": 1,
"bytesSinceLast": 0,
}
)
parts = []
with open(filepath, "rb") as file:
for i, presigned_url in enumerate(presigned_urls):
with SliceFileObj(
file,
seek_from=i * chunk_size,
read_limit=chunk_size,
) as data:
r = get_session().put(presigned_url, data=data)
hf_raise_for_status(r)
parts.append(
{
"etag": r.headers.get("etag"),
"partNumber": i + 1,
}
)
# In order to support progress reporting while data is uploading / downloading,
# the transfer process should post messages to stdout
write_msg(
{
"event": "progress",
"oid": oid,
"bytesSoFar": (i + 1) * chunk_size,
"bytesSinceLast": chunk_size,
}
)
r = get_session().post(
completion_url,
json={
"oid": oid,
"parts": parts,
},
)
hf_raise_for_status(r)
write_msg({"event": "complete", "oid": oid})

View File

@@ -0,0 +1,315 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to interact with repositories on the Hugging Face Hub.
Usage:
# create a new dataset repo on the Hub
hf repo create my-cool-dataset --repo-type=dataset
# create a private model repo on the Hub
hf repo create my-cool-model --private
"""
import enum
from typing import Annotated, Optional
import typer
from huggingface_hub.errors import HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
from huggingface_hub.utils import ANSI, logging
from ._cli_utils import (
PrivateOpt,
RepoIdArg,
RepoType,
RepoTypeOpt,
RevisionOpt,
TokenOpt,
get_hf_api,
typer_factory,
)
logger = logging.get_logger(__name__)
repo_cli = typer_factory(help="Manage repos on the Hub.")
tag_cli = typer_factory(help="Manage tags for a repo on the Hub.")
branch_cli = typer_factory(help="Manage branches for a repo on the Hub.")
repo_cli.add_typer(tag_cli, name="tag")
repo_cli.add_typer(branch_cli, name="branch")
class GatedChoices(str, enum.Enum):
auto = "auto"
manual = "manual"
false = "false"
@repo_cli.command("create", help="Create a new repo on the Hub.")
def repo_create(
repo_id: RepoIdArg,
repo_type: RepoTypeOpt = RepoType.model,
space_sdk: Annotated[
Optional[str],
typer.Option(
help="Hugging Face Spaces SDK type. Required when --type is set to 'space'.",
),
] = None,
private: PrivateOpt = False,
token: TokenOpt = None,
exist_ok: Annotated[
bool,
typer.Option(
help="Do not raise an error if repo already exists.",
),
] = False,
resource_group_id: Annotated[
Optional[str],
typer.Option(
help="Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations.",
),
] = None,
) -> None:
api = get_hf_api(token=token)
repo_url = api.create_repo(
repo_id=repo_id,
repo_type=repo_type.value,
private=private,
token=token,
exist_ok=exist_ok,
resource_group_id=resource_group_id,
space_sdk=space_sdk,
)
print(f"Successfully created {ANSI.bold(repo_url.repo_id)} on the Hub.")
print(f"Your repo is now available at {ANSI.bold(repo_url)}")
@repo_cli.command("delete", help="Delete a repo from the Hub. this is an irreversible operation.")
def repo_delete(
repo_id: RepoIdArg,
repo_type: RepoTypeOpt = RepoType.model,
token: TokenOpt = None,
missing_ok: Annotated[
bool,
typer.Option(
help="If set to True, do not raise an error if repo does not exist.",
),
] = False,
) -> None:
api = get_hf_api(token=token)
api.delete_repo(
repo_id=repo_id,
repo_type=repo_type.value,
missing_ok=missing_ok,
)
print(f"Successfully deleted {ANSI.bold(repo_id)} on the Hub.")
@repo_cli.command("move", help="Move a repository from a namespace to another namespace.")
def repo_move(
from_id: RepoIdArg,
to_id: RepoIdArg,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
api = get_hf_api(token=token)
api.move_repo(
from_id=from_id,
to_id=to_id,
repo_type=repo_type.value,
)
print(f"Successfully moved {ANSI.bold(from_id)} to {ANSI.bold(to_id)} on the Hub.")
@repo_cli.command("settings", help="Update the settings of a repository.")
def repo_settings(
repo_id: RepoIdArg,
gated: Annotated[
Optional[GatedChoices],
typer.Option(
help="The gated status for the repository.",
),
] = None,
private: Annotated[
Optional[bool],
typer.Option(
help="Whether the repository should be private.",
),
] = None,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
api = get_hf_api(token=token)
api.update_repo_settings(
repo_id=repo_id,
gated=(gated.value if gated else None), # type: ignore [arg-type]
private=private,
repo_type=repo_type.value,
)
print(f"Successfully updated the settings of {ANSI.bold(repo_id)} on the Hub.")
@branch_cli.command("create", help="Create a new branch for a repo on the Hub.")
def branch_create(
repo_id: RepoIdArg,
branch: Annotated[
str,
typer.Argument(
help="The name of the branch to create.",
),
],
revision: RevisionOpt = None,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
exist_ok: Annotated[
bool,
typer.Option(
help="If set to True, do not raise an error if branch already exists.",
),
] = False,
) -> None:
api = get_hf_api(token=token)
api.create_branch(
repo_id=repo_id,
branch=branch,
revision=revision,
repo_type=repo_type.value,
exist_ok=exist_ok,
)
print(f"Successfully created {ANSI.bold(branch)} branch on {repo_type.value} {ANSI.bold(repo_id)}")
@branch_cli.command("delete", help="Delete a branch from a repo on the Hub.")
def branch_delete(
repo_id: RepoIdArg,
branch: Annotated[
str,
typer.Argument(
help="The name of the branch to delete.",
),
],
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
api = get_hf_api(token=token)
api.delete_branch(
repo_id=repo_id,
branch=branch,
repo_type=repo_type.value,
)
print(f"Successfully deleted {ANSI.bold(branch)} branch on {repo_type.value} {ANSI.bold(repo_id)}")
@tag_cli.command("create", help="Create a tag for a repo.")
def tag_create(
repo_id: RepoIdArg,
tag: Annotated[
str,
typer.Argument(
help="The name of the tag to create.",
),
],
message: Annotated[
Optional[str],
typer.Option(
"-m",
"--message",
help="The description of the tag to create.",
),
] = None,
revision: RevisionOpt = None,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
repo_type_str = repo_type.value
api = get_hf_api(token=token)
print(f"You are about to create tag {ANSI.bold(tag)} on {repo_type_str} {ANSI.bold(repo_id)}")
try:
api.create_tag(repo_id=repo_id, tag=tag, tag_message=message, revision=revision, repo_type=repo_type_str)
except RepositoryNotFoundError:
print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
raise typer.Exit(code=1)
except RevisionNotFoundError:
print(f"Revision {ANSI.bold(str(revision))} not found.")
raise typer.Exit(code=1)
except HfHubHTTPError as e:
if e.response.status_code == 409:
print(f"Tag {ANSI.bold(tag)} already exists on {ANSI.bold(repo_id)}")
raise typer.Exit(code=1)
raise e
print(f"Tag {ANSI.bold(tag)} created on {ANSI.bold(repo_id)}")
@tag_cli.command("list", help="List tags for a repo.")
def tag_list(
repo_id: RepoIdArg,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
repo_type_str = repo_type.value
api = get_hf_api(token=token)
try:
refs = api.list_repo_refs(repo_id=repo_id, repo_type=repo_type_str)
except RepositoryNotFoundError:
print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
raise typer.Exit(code=1)
except HfHubHTTPError as e:
print(e)
print(ANSI.red(e.response.text))
raise typer.Exit(code=1)
if len(refs.tags) == 0:
print("No tags found")
raise typer.Exit(code=0)
print(f"Tags for {repo_type_str} {ANSI.bold(repo_id)}:")
for t in refs.tags:
print(t.name)
@tag_cli.command("delete", help="Delete a tag for a repo.")
def tag_delete(
repo_id: RepoIdArg,
tag: Annotated[
str,
typer.Argument(
help="The name of the tag to delete.",
),
],
yes: Annotated[
bool,
typer.Option(
"-y",
"--yes",
help="Answer Yes to prompt automatically",
),
] = False,
token: TokenOpt = None,
repo_type: RepoTypeOpt = RepoType.model,
) -> None:
repo_type_str = repo_type.value
print(f"You are about to delete tag {ANSI.bold(tag)} on {repo_type_str} {ANSI.bold(repo_id)}")
if not yes:
choice = input("Proceed? [Y/n] ").lower()
if choice not in ("", "y", "yes"):
print("Abort")
raise typer.Exit()
api = get_hf_api(token=token)
try:
api.delete_tag(repo_id=repo_id, tag=tag, repo_type=repo_type_str)
except RepositoryNotFoundError:
print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
raise typer.Exit(code=1)
except RevisionNotFoundError:
print(f"Tag {ANSI.bold(tag)} not found on {ANSI.bold(repo_id)}")
raise typer.Exit(code=1)
print(f"Tag {ANSI.bold(tag)} deleted on {ANSI.bold(repo_id)}")

View File

@@ -0,0 +1,94 @@
# coding=utf-8
# Copyright 2023-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to update or delete files in a repository using the CLI.
Usage:
# delete all
hf repo-files delete <repo_id> "*"
# delete single file
hf repo-files delete <repo_id> file.txt
# delete single folder
hf repo-files delete <repo_id> folder/
# delete multiple
hf repo-files delete <repo_id> file.txt folder/ file2.txt
# delete multiple patterns
hf repo-files delete <repo_id> file.txt "*.json" "folder/*.parquet"
# delete from different revision / repo-type
hf repo-files delete <repo_id> file.txt --revision=refs/pr/1 --repo-type=dataset
"""
from typing import Annotated, Optional
import typer
from huggingface_hub import logging
from ._cli_utils import RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api, typer_factory
logger = logging.get_logger(__name__)
repo_files_cli = typer_factory(help="Manage files in a repo on the Hub.")
@repo_files_cli.command("delete")
def repo_files_delete(
repo_id: RepoIdArg,
patterns: Annotated[
list[str],
typer.Argument(
help="Glob patterns to match files to delete.",
),
],
repo_type: RepoTypeOpt = RepoType.model,
revision: RevisionOpt = None,
commit_message: Annotated[
Optional[str],
typer.Option(
help="The summary / title / first line of the generated commit.",
),
] = None,
commit_description: Annotated[
Optional[str],
typer.Option(
help="The description of the generated commit.",
),
] = None,
create_pr: Annotated[
bool,
typer.Option(
help="Whether to create a new Pull Request for these changes.",
),
] = False,
token: TokenOpt = None,
) -> None:
api = get_hf_api(token=token)
url = api.delete_files(
delete_patterns=patterns,
repo_id=repo_id,
repo_type=repo_type.value,
revision=revision,
commit_message=commit_message,
commit_description=commit_description,
create_pr=create_pr,
)
print(f"Files correctly deleted from repo. Commit: {url}.")
logging.set_verbosity_warning()

View File

@@ -0,0 +1,33 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to print information about the environment and version.
Usage:
hf env
hf version
"""
from huggingface_hub import __version__
from ..utils import dump_environment_info
def env() -> None:
"""Print information about the environment."""
dump_environment_info()
def version() -> None:
"""Print CLI version."""
print(__version__)

View File

@@ -0,0 +1,294 @@
# coding=utf-8
# Copyright 2023-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to upload a repo or file with the CLI.
Usage:
# Upload file (implicit)
hf upload my-cool-model ./my-cool-model.safetensors
# Upload file (explicit)
hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
# Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
hf upload my-cool-model
# Upload directory (explicit)
hf upload my-cool-model ./models/my-cool-model .
# Upload filtered directory (example: tensorboard logs except for the last run)
hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
# Upload with wildcard
hf upload my-cool-model "./model/training/*.safetensors"
# Upload private dataset
hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
# Upload with token
hf upload Wauplin/my-cool-model --token=hf_****
# Sync local Space with Hub (upload new files, delete removed files)
hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
# Schedule commits every 30 minutes
hf upload Wauplin/my-cool-model --every=30
"""
import os
import time
import warnings
from typing import Annotated, Optional
import typer
from huggingface_hub import logging
from huggingface_hub._commit_scheduler import CommitScheduler
from huggingface_hub.errors import RevisionNotFoundError
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
logger = logging.get_logger(__name__)
def upload(
repo_id: RepoIdArg,
local_path: Annotated[
Optional[str],
typer.Argument(
help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
),
] = None,
path_in_repo: Annotated[
Optional[str],
typer.Argument(
help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
),
] = None,
repo_type: RepoTypeOpt = RepoType.model,
revision: RevisionOpt = None,
private: PrivateOpt = False,
include: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to match files to upload.",
),
] = None,
exclude: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to exclude from files to upload.",
),
] = None,
delete: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns for file to be deleted from the repo while committing.",
),
] = None,
commit_message: Annotated[
Optional[str],
typer.Option(
help="The summary / title / first line of the generated commit.",
),
] = None,
commit_description: Annotated[
Optional[str],
typer.Option(
help="The description of the generated commit.",
),
] = None,
create_pr: Annotated[
bool,
typer.Option(
help="Whether to upload content as a new Pull Request.",
),
] = False,
every: Annotated[
Optional[float],
typer.Option(
help="f set, a background job is scheduled to create commits every `every` minutes.",
),
] = None,
token: TokenOpt = None,
quiet: Annotated[
bool,
typer.Option(
help="Disable progress bars and warnings; print only the returned path.",
),
] = False,
) -> None:
"""Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
if every is not None and every <= 0:
raise typer.BadParameter("--every must be a positive value", param_hint="every")
repo_type_str = repo_type.value
api = get_hf_api(token=token)
# Resolve local_path and path_in_repo based on implicit/explicit rules
resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
)
def run_upload() -> str:
if os.path.isfile(resolved_local_path):
if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
warnings.warn("Ignoring --include since a single file is uploaded.")
if exclude is not None and len(exclude) > 0:
warnings.warn("Ignoring --exclude since a single file is uploaded.")
if delete is not None and len(delete) > 0:
warnings.warn("Ignoring --delete since a single file is uploaded.")
# Schedule commits if `every` is set
if every is not None:
if os.path.isfile(resolved_local_path):
# If file => watch entire folder + use allow_patterns
folder_path = os.path.dirname(resolved_local_path)
pi = (
resolved_path_in_repo[: -len(resolved_local_path)]
if resolved_path_in_repo.endswith(resolved_local_path)
else resolved_path_in_repo
)
allow_patterns = [resolved_local_path]
ignore_patterns: Optional[list[str]] = []
else:
folder_path = resolved_local_path
pi = resolved_path_in_repo
allow_patterns = (
resolved_include or []
if isinstance(resolved_include, list)
else [resolved_include]
if isinstance(resolved_include, str)
else []
)
ignore_patterns = exclude or []
if delete is not None and len(delete) > 0:
warnings.warn("Ignoring --delete when uploading with scheduled commits.")
scheduler = CommitScheduler(
folder_path=folder_path,
repo_id=repo_id,
repo_type=repo_type_str,
revision=revision,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
path_in_repo=pi,
private=private,
every=every,
hf_api=api,
)
print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
try:
while True:
time.sleep(100)
except KeyboardInterrupt:
scheduler.stop()
return "Stopped scheduled commits."
# Otherwise, create repo and proceed with the upload
if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
created = api.create_repo(
repo_id=repo_id,
repo_type=repo_type_str,
exist_ok=True,
private=private,
space_sdk="gradio" if repo_type_str == "space" else None,
# ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
# ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
).repo_id
# Check if branch already exists and if not, create it
if revision is not None and not create_pr:
try:
api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
except RevisionNotFoundError:
logger.info(f"Branch '{revision}' not found. Creating it...")
api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
# ^ `exist_ok=True` to avoid race concurrency issues
# File-based upload
if os.path.isfile(resolved_local_path):
return api.upload_file(
path_or_fileobj=resolved_local_path,
path_in_repo=resolved_path_in_repo,
repo_id=created,
repo_type=repo_type_str,
revision=revision,
commit_message=commit_message,
commit_description=commit_description,
create_pr=create_pr,
)
# Folder-based upload
return api.upload_folder(
folder_path=resolved_local_path,
path_in_repo=resolved_path_in_repo,
repo_id=created,
repo_type=repo_type_str,
revision=revision,
commit_message=commit_message,
commit_description=commit_description,
create_pr=create_pr,
allow_patterns=(
resolved_include
if isinstance(resolved_include, list)
else [resolved_include]
if isinstance(resolved_include, str)
else None
),
ignore_patterns=exclude,
delete_patterns=delete,
)
if quiet:
disable_progress_bars()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
print(run_upload())
enable_progress_bars()
else:
print(run_upload())
logging.set_verbosity_warning()
def _resolve_upload_paths(
*, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
) -> tuple[str, str, Optional[list[str]]]:
repo_name = repo_id.split("/")[-1]
resolved_include = include
if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
if include is not None:
raise ValueError("Cannot set --include when local_path contains a wildcard.")
if path_in_repo is not None and path_in_repo != ".":
raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
return ".", local_path, ["."] # will be adjusted below; placeholder for type
if local_path is None and os.path.isfile(repo_name):
return repo_name, repo_name, resolved_include
if local_path is None and os.path.isdir(repo_name):
return repo_name, ".", resolved_include
if local_path is None:
raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
if path_in_repo is None and os.path.isfile(local_path):
return local_path, os.path.basename(local_path), resolved_include
if path_in_repo is None:
return local_path, ".", resolved_include
return local_path, path_in_repo, resolved_include

View File

@@ -0,0 +1,117 @@
# coding=utf-8
# Copyright 2023-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to upload a large folder with the CLI."""
import os
from typing import Annotated, Optional
import typer
from huggingface_hub import logging
from huggingface_hub.utils import ANSI, disable_progress_bars
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
logger = logging.get_logger(__name__)
def upload_large_folder(
repo_id: RepoIdArg,
local_path: Annotated[
str,
typer.Argument(
help="Local path to the folder to upload.",
),
],
repo_type: RepoTypeOpt = RepoType.model,
revision: RevisionOpt = None,
private: PrivateOpt = False,
include: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to match files to upload.",
),
] = None,
exclude: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to exclude from files to upload.",
),
] = None,
token: TokenOpt = None,
num_workers: Annotated[
Optional[int],
typer.Option(
help="Number of workers to use to hash, upload and commit files.",
),
] = None,
no_report: Annotated[
bool,
typer.Option(
help="Whether to disable regular status report.",
),
] = False,
no_bars: Annotated[
bool,
typer.Option(
help="Whether to disable progress bars.",
),
] = False,
) -> None:
"""Upload a large folder to the Hub. Recommended for resumable uploads."""
if not os.path.isdir(local_path):
raise typer.BadParameter("Large upload is only supported for folders.", param_hint="local_path")
print(
ANSI.yellow(
"You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
"This is a new feature so feedback is very welcome!\n"
"\n"
"A few things to keep in mind:\n"
" - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
" - Do not start several processes in parallel.\n"
" - You can interrupt and resume the process at any time. "
"The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
" - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
"\n"
f"Some temporary metadata will be stored under `{local_path}/.cache/huggingface`.\n"
" - You must not modify those files manually.\n"
" - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
" - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
"\n"
"If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
"You can also entirely disable the status report with `--no-report`.\n"
"\n"
"For more details, run `hf upload-large-folder --help` or check the documentation at "
"https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
)
)
if no_bars:
disable_progress_bars()
api = get_hf_api(token=token)
api.upload_large_folder(
repo_id=repo_id,
folder_path=local_path,
repo_type=repo_type.value,
revision=revision,
private=private,
allow_patterns=include,
ignore_patterns=exclude,
num_workers=num_workers,
print_report=not no_report,
)