feat(git-extract): make git extract work with whitelisting files
This commit is contained in:
parent
17aa39b73c
commit
f201190ef1
1 changed files with 43 additions and 23 deletions
|
@ -166,21 +166,30 @@ def get_latest_text_files_to_stdout(remote_repo_url=None, ignored_files=None):
|
||||||
working directory (if no URL is provided) to a temporary folder,
|
working directory (if no URL is provided) to a temporary folder,
|
||||||
and then prints the contents of all files identified as text files to stdout,
|
and then prints the contents of all files identified as text files to stdout,
|
||||||
prepended by their relative paths from the repository root, excluding specified
|
prepended by their relative paths from the repository root, excluding specified
|
||||||
ignored files.
|
ignored files. Supports "!" to specify includes only.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
remote_repo_url: The URL of the remote Git repository (optional). If None,
|
remote_repo_url: The URL of the remote Git repository (optional). If None,
|
||||||
the current working directory is assumed to be a Git repo.
|
the current working directory is assumed to be a Git repo.
|
||||||
ignored_files: A list of files or directories to ignore (relative to the repo root).
|
ignored_files: A list of files or directories to ignore (relative to the repo root).
|
||||||
|
If a list contains a value starting with "!", it means "include only".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
temp_dir = None
|
temp_dir = None
|
||||||
if ignored_files is None:
|
if ignored_files is None:
|
||||||
ignored_files = []
|
ignored_files = []
|
||||||
|
|
||||||
# Ensure .git and .gitignore are always ignored
|
# Ensure .git and .gitignore are always ignored (unless include only is specified)
|
||||||
ignored_files.extend([".git", ".gitignore"])
|
include_only = any(item.startswith("!") for item in ignored_files)
|
||||||
ignored_files = list(set(ignored_files)) # remove duplicates
|
if not include_only:
|
||||||
|
ignored_files.extend([".git", ".gitignore"])
|
||||||
|
ignored_files = list(set(ignored_files)) # remove duplicates
|
||||||
|
|
||||||
|
# Determine if "include only" is active and extract the include paths
|
||||||
|
include_only = any(item.startswith("!") for item in ignored_files)
|
||||||
|
include_paths = [item[1:] for item in ignored_files if item.startswith("!")]
|
||||||
|
ignore_paths = [item for item in ignored_files if not item.startswith("!")]
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create a temporary directory
|
# Create a temporary directory
|
||||||
|
@ -215,25 +224,36 @@ def get_latest_text_files_to_stdout(remote_repo_url=None, ignored_files=None):
|
||||||
file_path = os.path.join(root, file)
|
file_path = os.path.join(root, file)
|
||||||
relative_path = os.path.relpath(file_path, temp_dir)
|
relative_path = os.path.relpath(file_path, temp_dir)
|
||||||
|
|
||||||
# Check if the file or any of its parent directories are ignored
|
if include_only:
|
||||||
ignore = False
|
# Include only logic
|
||||||
path_components = relative_path.split(
|
include = False
|
||||||
os.sep
|
for include_path in include_paths:
|
||||||
) # split based on OS-specific path separator
|
if relative_path.startswith(include_path):
|
||||||
current_path = ""
|
include = True
|
||||||
for component in path_components:
|
break
|
||||||
current_path = (
|
if not include:
|
||||||
os.path.join(current_path, component)
|
continue # Skip if not in include paths
|
||||||
if current_path
|
else:
|
||||||
else component
|
# Ignore logic (standard ignore)
|
||||||
) # prevent empty first join
|
ignore = False
|
||||||
if current_path in ignored_files:
|
path_components = relative_path.split(
|
||||||
ignore = True
|
os.sep
|
||||||
break
|
) # split based on OS-specific path separator
|
||||||
|
current_path = ""
|
||||||
|
for component in path_components:
|
||||||
|
current_path = (
|
||||||
|
os.path.join(current_path, component)
|
||||||
|
if current_path
|
||||||
|
else component
|
||||||
|
) # prevent empty first join
|
||||||
|
if current_path in ignore_paths:
|
||||||
|
ignore = True
|
||||||
|
break
|
||||||
|
if ignore:
|
||||||
|
continue
|
||||||
|
|
||||||
if not ignore:
|
if is_text_file(file_path): # Use the is_text_file function
|
||||||
if is_text_file(file_path): # Use the is_text_file function
|
text_files.append(file_path)
|
||||||
text_files.append(file_path)
|
|
||||||
|
|
||||||
# Print the contents of each text file, prepended by its relative path
|
# Print the contents of each text file, prepended by its relative path
|
||||||
for file_path in text_files:
|
for file_path in text_files:
|
||||||
|
@ -275,7 +295,7 @@ if __name__ == "__main__":
|
||||||
"--ignored-files",
|
"--ignored-files",
|
||||||
nargs="+",
|
nargs="+",
|
||||||
default=[],
|
default=[],
|
||||||
help="Files or directories to ignore (space-separated).",
|
help="Files or directories to ignore (space-separated). Use !<path> to specify include only.",
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue