file_utils
[ allennlp.common.file_utils ]
Utilities for working with the local dataset cache.
CACHE_ROOT#
CACHE_ROOT = Path(os.getenv("ALLENNLP_CACHE_ROOT", Path.home() / ".allennlp"))
CACHE_DIRECTORY#
CACHE_DIRECTORY = str(CACHE_ROOT / "cache")
DEPRECATED_CACHE_DIRECTORY#
DEPRECATED_CACHE_DIRECTORY = str(CACHE_ROOT / "datasets")
DATASET_CACHE#
DATASET_CACHE = CACHE_DIRECTORY
url_to_filename#
def url_to_filename(url: str, etag: str = None) -> str
Convert url
into a hashed filename in a repeatable way.
If etag
is specified, append its hash to the url's, delimited
by a period.
filename_to_url#
def filename_to_url(
filename: str,
cache_dir: str = None
) -> Tuple[str, str]
Return the url and etag (which may be None
) stored for filename
.
Raise FileNotFoundError
if filename
or its stored metadata do not exist.
cached_path#
def cached_path(
url_or_filename: Union[str, Path],
cache_dir: str = None
) -> str
Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path.
is_url_or_existing_file#
def is_url_or_existing_file(
url_or_filename: Union[str, Path, None]
) -> bool
Given something that might be a URL (or might be a local path), determine check if it's url or an existing file path.
CacheFile Objects#
class CacheFile():
| def __init__(
| self,
| cache_filename: Union[Path, str],
| mode="w+b"
| ) -> None
This is a context manager that makes robust caching easier.
On __enter__
, an IO handle to a temporarily file is returned, which can
be treated as if it's the actual cache file.
On __exit__
, the temporarily file is renamed to the cache file. If anything
goes wrong while writing to the temporary file, it will be removed.
get_from_cache#
def get_from_cache(url: str, cache_dir: str = None) -> str
Given a URL, look for the corresponding dataset in the local cache. If it's not there, download it. Then return the path to the cached file.
read_set_from_file#
def read_set_from_file(filename: str) -> Set[str]
Extract a de-duped collection (set) of text from a file. Expected file format is one item per line.
get_file_extension#
def get_file_extension(path: str, dot=True, lower: bool = True)
open_compressed#
def open_compressed(
filename: Union[str, Path],
mode: str = "rt",
encoding: Optional[str] = "UTF-8",
**kwargs
)