chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,333 @@
|
||||
import base64
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from ..spec import AbstractFileSystem
|
||||
from ..utils import infer_storage_options
|
||||
from .memory import MemoryFile
|
||||
|
||||
|
||||
class GithubFileSystem(AbstractFileSystem):
|
||||
"""Interface to files in github
|
||||
|
||||
An instance of this class provides the files residing within a remote github
|
||||
repository. You may specify a point in the repos history, by SHA, branch
|
||||
or tag (default is current master).
|
||||
|
||||
For files less than 1 MB in size, file content is returned directly in a
|
||||
MemoryFile. For larger files, or for files tracked by git-lfs, file content
|
||||
is returned as an HTTPFile wrapping the ``download_url`` provided by the
|
||||
GitHub API.
|
||||
|
||||
When using fsspec.open, allows URIs of the form:
|
||||
|
||||
- "github://path/file", in which case you must specify org, repo and
|
||||
may specify sha in the extra args
|
||||
- 'github://org:repo@/precip/catalog.yml', where the org and repo are
|
||||
part of the URI
|
||||
- 'github://org:repo@sha/precip/catalog.yml', where the sha is also included
|
||||
|
||||
``sha`` can be the full or abbreviated hex of the commit you want to fetch
|
||||
from, or a branch or tag name (so long as it doesn't contain special characters
|
||||
like "/", "?", which would have to be HTTP-encoded).
|
||||
|
||||
For authorised access, you must provide username and token, which can be made
|
||||
at https://github.com/settings/tokens
|
||||
"""
|
||||
|
||||
url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
|
||||
content_url = "https://api.github.com/repos/{org}/{repo}/contents/{path}?ref={sha}"
|
||||
protocol = "github"
|
||||
timeout = (60, 60) # connect, read timeouts
|
||||
|
||||
def __init__(
|
||||
self, org, repo, sha=None, username=None, token=None, timeout=None, **kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.org = org
|
||||
self.repo = repo
|
||||
if (username is None) ^ (token is None):
|
||||
raise ValueError("Auth required both username and token")
|
||||
self.username = username
|
||||
self.token = token
|
||||
if timeout is not None:
|
||||
self.timeout = timeout
|
||||
if sha is None:
|
||||
# look up default branch (not necessarily "master")
|
||||
u = "https://api.github.com/repos/{org}/{repo}"
|
||||
r = requests.get(
|
||||
u.format(org=org, repo=repo), timeout=self.timeout, **self.kw
|
||||
)
|
||||
r.raise_for_status()
|
||||
sha = r.json()["default_branch"]
|
||||
|
||||
self.root = sha
|
||||
self.ls("")
|
||||
try:
|
||||
from .http import HTTPFileSystem
|
||||
|
||||
self.http_fs = HTTPFileSystem(**kwargs)
|
||||
except ImportError:
|
||||
self.http_fs = None
|
||||
|
||||
@property
|
||||
def kw(self):
|
||||
if self.username:
|
||||
return {"auth": (self.username, self.token)}
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def repos(cls, org_or_user, is_org=True):
|
||||
"""List repo names for given org or user
|
||||
|
||||
This may become the top level of the FS
|
||||
|
||||
Parameters
|
||||
----------
|
||||
org_or_user: str
|
||||
Name of the github org or user to query
|
||||
is_org: bool (default True)
|
||||
Whether the name is an organisation (True) or user (False)
|
||||
|
||||
Returns
|
||||
-------
|
||||
List of string
|
||||
"""
|
||||
r = requests.get(
|
||||
f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos",
|
||||
timeout=cls.timeout,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return [repo["name"] for repo in r.json()]
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
"""Names of tags in the repo"""
|
||||
r = requests.get(
|
||||
f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
|
||||
timeout=self.timeout,
|
||||
**self.kw,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return [t["name"] for t in r.json()]
|
||||
|
||||
@property
|
||||
def branches(self):
|
||||
"""Names of branches in the repo"""
|
||||
r = requests.get(
|
||||
f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
|
||||
timeout=self.timeout,
|
||||
**self.kw,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return [t["name"] for t in r.json()]
|
||||
|
||||
@property
|
||||
def refs(self):
|
||||
"""Named references, tags and branches"""
|
||||
return {"tags": self.tags, "branches": self.branches}
|
||||
|
||||
def ls(self, path, detail=False, sha=None, _sha=None, **kwargs):
|
||||
"""List files at given path
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: str
|
||||
Location to list, relative to repo root
|
||||
detail: bool
|
||||
If True, returns list of dicts, one per file; if False, returns
|
||||
list of full filenames only
|
||||
sha: str (optional)
|
||||
List at the given point in the repo history, branch or tag name or commit
|
||||
SHA
|
||||
_sha: str (optional)
|
||||
List this specific tree object (used internally to descend into trees)
|
||||
"""
|
||||
path = self._strip_protocol(path)
|
||||
if path == "":
|
||||
_sha = sha or self.root
|
||||
if _sha is None:
|
||||
parts = path.rstrip("/").split("/")
|
||||
so_far = ""
|
||||
_sha = sha or self.root
|
||||
for part in parts:
|
||||
out = self.ls(so_far, True, sha=sha, _sha=_sha)
|
||||
so_far += "/" + part if so_far else part
|
||||
out = [o for o in out if o["name"] == so_far]
|
||||
if not out:
|
||||
raise FileNotFoundError(path)
|
||||
out = out[0]
|
||||
if out["type"] == "file":
|
||||
if detail:
|
||||
return [out]
|
||||
else:
|
||||
return path
|
||||
_sha = out["sha"]
|
||||
if path not in self.dircache or sha not in [self.root, None]:
|
||||
r = requests.get(
|
||||
self.url.format(org=self.org, repo=self.repo, sha=_sha),
|
||||
timeout=self.timeout,
|
||||
**self.kw,
|
||||
)
|
||||
if r.status_code == 404:
|
||||
raise FileNotFoundError(path)
|
||||
r.raise_for_status()
|
||||
types = {"blob": "file", "tree": "directory"}
|
||||
out = [
|
||||
{
|
||||
"name": path + "/" + f["path"] if path else f["path"],
|
||||
"mode": f["mode"],
|
||||
"type": types[f["type"]],
|
||||
"size": f.get("size", 0),
|
||||
"sha": f["sha"],
|
||||
}
|
||||
for f in r.json()["tree"]
|
||||
if f["type"] in types
|
||||
]
|
||||
if sha in [self.root, None]:
|
||||
self.dircache[path] = out
|
||||
else:
|
||||
out = self.dircache[path]
|
||||
if detail:
|
||||
return out
|
||||
else:
|
||||
return sorted([f["name"] for f in out])
|
||||
|
||||
def invalidate_cache(self, path=None):
|
||||
self.dircache.clear()
|
||||
|
||||
@classmethod
|
||||
def _strip_protocol(cls, path):
|
||||
opts = infer_storage_options(path)
|
||||
if "username" not in opts:
|
||||
return super()._strip_protocol(path)
|
||||
return opts["path"].lstrip("/")
|
||||
|
||||
@staticmethod
|
||||
def _get_kwargs_from_urls(path):
|
||||
opts = infer_storage_options(path)
|
||||
if "username" not in opts:
|
||||
return {}
|
||||
out = {"org": opts["username"], "repo": opts["password"]}
|
||||
if opts["host"]:
|
||||
out["sha"] = opts["host"]
|
||||
return out
|
||||
|
||||
def _open(
|
||||
self,
|
||||
path,
|
||||
mode="rb",
|
||||
block_size=None,
|
||||
cache_options=None,
|
||||
sha=None,
|
||||
**kwargs,
|
||||
):
|
||||
if mode != "rb":
|
||||
raise NotImplementedError
|
||||
|
||||
# construct a url to hit the GitHub API's repo contents API
|
||||
url = self.content_url.format(
|
||||
org=self.org, repo=self.repo, path=path, sha=sha or self.root
|
||||
)
|
||||
|
||||
# make a request to this API, and parse the response as JSON
|
||||
r = requests.get(url, timeout=self.timeout, **self.kw)
|
||||
if r.status_code == 404:
|
||||
raise FileNotFoundError(path)
|
||||
r.raise_for_status()
|
||||
content_json = r.json()
|
||||
|
||||
# if the response's content key is not empty, try to parse it as base64
|
||||
if content_json["content"]:
|
||||
content = base64.b64decode(content_json["content"])
|
||||
|
||||
# as long as the content does not start with the string
|
||||
# "version https://git-lfs.github.com/"
|
||||
# then it is probably not a git-lfs pointer and we can just return
|
||||
# the content directly
|
||||
if not content.startswith(b"version https://git-lfs.github.com/"):
|
||||
return MemoryFile(None, None, content)
|
||||
|
||||
# we land here if the content was not present in the first response
|
||||
# (regular file over 1MB or git-lfs tracked file)
|
||||
# in this case, we get let the HTTPFileSystem handle the download
|
||||
if self.http_fs is None:
|
||||
raise ImportError(
|
||||
"Please install fsspec[http] to access github files >1 MB "
|
||||
"or git-lfs tracked files."
|
||||
)
|
||||
return self.http_fs.open(
|
||||
content_json["download_url"],
|
||||
mode=mode,
|
||||
block_size=block_size,
|
||||
cache_options=cache_options,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def rm(self, path, recursive=False, maxdepth=None, message=None):
|
||||
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
||||
for p in reversed(path):
|
||||
self.rm_file(p, message=message)
|
||||
|
||||
def rm_file(self, path, message=None, **kwargs):
|
||||
"""
|
||||
Remove a file from a specified branch using a given commit message.
|
||||
|
||||
Since Github DELETE operation requires a branch name, and we can't reliably
|
||||
determine whether the provided SHA refers to a branch, tag, or commit, we
|
||||
assume it's a branch. If it's not, the user will encounter an error when
|
||||
attempting to retrieve the file SHA or delete the file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: str
|
||||
The file's location relative to the repository root.
|
||||
message: str, optional
|
||||
The commit message for the deletion.
|
||||
"""
|
||||
|
||||
if not self.username:
|
||||
raise ValueError("Authentication required")
|
||||
|
||||
path = self._strip_protocol(path)
|
||||
|
||||
# Attempt to get SHA from cache or Github API
|
||||
sha = self._get_sha_from_cache(path)
|
||||
if not sha:
|
||||
url = self.content_url.format(
|
||||
org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
|
||||
)
|
||||
r = requests.get(url, timeout=self.timeout, **self.kw)
|
||||
if r.status_code == 404:
|
||||
raise FileNotFoundError(path)
|
||||
r.raise_for_status()
|
||||
sha = r.json()["sha"]
|
||||
|
||||
# Delete the file
|
||||
delete_url = self.content_url.format(
|
||||
org=self.org, repo=self.repo, path=path, sha=self.root
|
||||
)
|
||||
branch = self.root
|
||||
data = {
|
||||
"message": message or f"Delete {path}",
|
||||
"sha": sha,
|
||||
**({"branch": branch} if branch else {}),
|
||||
}
|
||||
|
||||
r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
|
||||
error_message = r.json().get("message", "")
|
||||
if re.search(r"Branch .+ not found", error_message):
|
||||
error = "Remove only works when the filesystem is initialised from a branch or default (None)"
|
||||
raise ValueError(error)
|
||||
r.raise_for_status()
|
||||
|
||||
self.invalidate_cache(path)
|
||||
|
||||
def _get_sha_from_cache(self, path):
|
||||
for entries in self.dircache.values():
|
||||
for entry in entries:
|
||||
entry_path = entry.get("name")
|
||||
if entry_path and entry_path == path and "sha" in entry:
|
||||
return entry["sha"]
|
||||
return None
|
||||
Reference in New Issue
Block a user