feat(git-extract): add local repo support
This commit is contained in:
parent
aeb847aa47
commit
17aa39b73c
1 changed files with 42 additions and 21 deletions
|
@ -157,18 +157,20 @@ def is_text_file(file_path, aggressive=False):
|
||||||
# Run checks and return True if any of them pass
|
# Run checks and return True if any of them pass
|
||||||
high_entropy_check = not has_high_entropy(file_path)
|
high_entropy_check = not has_high_entropy(file_path)
|
||||||
chardet_check = check_chardet_encoding(file_path)
|
chardet_check = check_chardet_encoding(file_path)
|
||||||
return ascii_check or high_entropy_check or chardet_chec
|
return ascii_check or high_entropy_check or chardet_check
|
||||||
|
|
||||||
|
|
||||||
def get_latest_text_files_to_stdout(remote_repo_url, ignored_files=None):
|
def get_latest_text_files_to_stdout(remote_repo_url=None, ignored_files=None):
|
||||||
"""
|
"""
|
||||||
Checks out the latest commit from a remote Git repository to a temporary folder,
|
Checks out the latest commit from a remote Git repository or the current
|
||||||
|
working directory (if no URL is provided) to a temporary folder,
|
||||||
and then prints the contents of all files identified as text files to stdout,
|
and then prints the contents of all files identified as text files to stdout,
|
||||||
prepended by their relative paths from the repository root, excluding specified
|
prepended by their relative paths from the repository root, excluding specified
|
||||||
ignored files.
|
ignored files.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
remote_repo_url: The URL of the remote Git repository.
|
remote_repo_url: The URL of the remote Git repository (optional). If None,
|
||||||
|
the current working directory is assumed to be a Git repo.
|
||||||
ignored_files: A list of files or directories to ignore (relative to the repo root).
|
ignored_files: A list of files or directories to ignore (relative to the repo root).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -185,12 +187,26 @@ def get_latest_text_files_to_stdout(remote_repo_url, ignored_files=None):
|
||||||
temp_dir = tempfile.mkdtemp()
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
# Clone the repository, but only the latest commit (shallow clone)
|
# Clone the repository, but only the latest commit (shallow clone)
|
||||||
subprocess.run(
|
clone_command = ["git", "clone", "--depth", "1"]
|
||||||
["git", "clone", "--depth", "1", remote_repo_url, temp_dir],
|
if remote_repo_url:
|
||||||
check=True,
|
clone_command.extend([remote_repo_url, temp_dir])
|
||||||
capture_output=True,
|
else:
|
||||||
text=True,
|
# Check if the current directory is a Git repository.
|
||||||
)
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
["git", "rev-parse", "--is-inside-work-tree"],
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
cwd=os.getcwd(),
|
||||||
|
) # run in current directory
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise ValueError(
|
||||||
|
"No Git repository URL provided and current directory is not a Git repository."
|
||||||
|
)
|
||||||
|
clone_command.extend([os.getcwd(), temp_dir]) # clone current dir to temp
|
||||||
|
|
||||||
|
subprocess.run(clone_command, check=True, capture_output=True, text=True)
|
||||||
|
|
||||||
# Find all files and filter for text files
|
# Find all files and filter for text files
|
||||||
text_files = []
|
text_files = []
|
||||||
|
@ -234,6 +250,8 @@ def get_latest_text_files_to_stdout(remote_repo_url, ignored_files=None):
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(f"Error executing Git command: {e.stderr}")
|
print(f"Error executing Git command: {e.stderr}")
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An error occurred: {e}")
|
print(f"An error occurred: {e}")
|
||||||
finally:
|
finally:
|
||||||
|
@ -247,7 +265,10 @@ if __name__ == "__main__":
|
||||||
description="Checkout and print text files from a remote Git repository."
|
description="Checkout and print text files from a remote Git repository."
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-r", "--repo", required=True, help="The URL of the remote Git repository."
|
"-r",
|
||||||
|
"--repo",
|
||||||
|
required=False,
|
||||||
|
help="The URL of the remote Git repository. If not provided, the current directory is used if it's a Git repository.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-i",
|
"-i",
|
||||||
|
@ -262,15 +283,15 @@ if __name__ == "__main__":
|
||||||
remote_repository_url = args.repo
|
remote_repository_url = args.repo
|
||||||
ignored_files = args.ignored_files
|
ignored_files = args.ignored_files
|
||||||
|
|
||||||
# Verify the URL
|
# Verify the URL only if it's provided
|
||||||
if (
|
if remote_repository_url:
|
||||||
"github.com" not in remote_repository_url
|
if (
|
||||||
and "gitlab.com" not in remote_repository_url
|
"github.com" not in remote_repository_url
|
||||||
and "bitbucket.org" not in remote_repository_url
|
and "gitlab.com" not in remote_repository_url
|
||||||
):
|
and "bitbucket.org" not in remote_repository_url
|
||||||
print(
|
):
|
||||||
"Warning: This script is designed for common public repository hosting providers. Ensure the Git URL is correct."
|
print(
|
||||||
)
|
"Warning: This script is designed for common public repository hosting providers. Ensure the Git URL is correct."
|
||||||
|
)
|
||||||
|
|
||||||
get_latest_text_files_to_stdout(remote_repository_url, ignored_files)
|
get_latest_text_files_to_stdout(remote_repository_url, ignored_files)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue