From eb223f38b01b65520645cee3c72fea8a295dd781 Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 15:48:34 +0800 Subject: [PATCH 1/7] feat(consolidate): init the script and check executableness --- bin/consolidate | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 bin/consolidate diff --git a/bin/consolidate b/bin/consolidate new file mode 100755 index 0000000..52dec55 --- /dev/null +++ b/bin/consolidate @@ -0,0 +1,38 @@ +#!/bin/bash + +# Start the process. +echo "Consolidating..." + +# Check for input: +# If the user provides any arguments, store those as the list of file extensions to look for. +# If no arguments are given, set a default list of extensions such as "txt", "sh", "md", and "csv". + +# Prepare a temporary file: +# Create a temporary file that will hold patterns from the ".gitignore" file. + +# Read the .gitignore file: +# If the ".gitignore" file is present, read it and pull out lines that are not comments or blank. +# Store these patterns in the temporary file. + +# Create an output file: +# Get the current date and time, and create an output file named using this timestamp. + +# Find files: +# Look for all files in the current directory and its subdirectories, +# excluding the ".gitignore" file itself. +# Filter these files based on the specified extensions. +# Exclude any files that appear to be binary. +# Exclude any files that match patterns listed in the ".gitignore". + +# Process each file: +# For each file that matches the criteria: +# - Write a header to the output file that includes the filename. +# - Add the content of the file to the output file. +# - Finish with a closing code block marker. + +# Clean up: +# Delete the temporary file holding the patterns from the ".gitignore". + +# Complete the process: +# Inform the user that the operation is complete and let them know where the consolidated output has been saved. +echo "Consolidation complete! See your output file at: " \ No newline at end of file From 242524f1a5049cd47fe137312afa636e820938ab Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 16:29:20 +0800 Subject: [PATCH 2/7] feat(consolidate): add extension filtering --- bin/consolidate | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/bin/consolidate b/bin/consolidate index 52dec55..4a38370 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -7,6 +7,7 @@ echo "Consolidating..." # If the user provides any arguments, store those as the list of file extensions to look for. # If no arguments are given, set a default list of extensions such as "txt", "sh", "md", and "csv". +EXTENSIONS=() # Prepare a temporary file: # Create a temporary file that will hold patterns from the ".gitignore" file. @@ -17,9 +18,23 @@ echo "Consolidating..." # Create an output file: # Get the current date and time, and create an output file named using this timestamp. -# Find files: +### Find files: + +# The regex pattern starts with '.*', which searches for any matching characters. +# '.' is a wildcard, while '*' means match as many occurrences of the preceding char. +# '\.' escapes the '.' special char to search for the actual char. +# () is a capturing group. The | character is an OR operator +# '$' indicates the preceding string should be followed by the end of line +if [ ${#EXTENSIONS[@]} -gt 0 ]; then + EXTENSIONS_PATTERN=$(printf '\\|%s' "${EXTENSIONS[@]}") + REGEX_EXPRESSION=".*\.\($EXTENSIONS_PATTERN\)$" + echo "Regex Expression: $REGEX_EXPRESSION" +else + REGEX_EXPRESSION=".*$" +fi # Look for all files in the current directory and its subdirectories, # excluding the ".gitignore" file itself. +find . -regex $REGEX_EXPRESSION # Filter these files based on the specified extensions. # Exclude any files that appear to be binary. # Exclude any files that match patterns listed in the ".gitignore". From 3764a8902cedeadf531ac7434dc7599398fd5c6e Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 16:41:02 +0800 Subject: [PATCH 3/7] feat(consolidate): add command line args to add whitelisted extensions --- bin/consolidate | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/bin/consolidate b/bin/consolidate index 4a38370..82ac66a 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -5,9 +5,20 @@ echo "Consolidating..." # Check for input: # If the user provides any arguments, store those as the list of file extensions to look for. -# If no arguments are given, set a default list of extensions such as "txt", "sh", "md", and "csv". +# If no arguments are given, leave extensions array empty to let the regex pattern search for everything. +if [ $# -gt 0 ]; then + echo "$# arguments entered" + EXTENSIONS=() + for var in $@ + do + EXTENSIONS+=("${var}") + done +else + echo "0 arguments entered. Searching all files" + EXTENSIONS=() +fi +echo ${EXTENSIONS} -EXTENSIONS=() # Prepare a temporary file: # Create a temporary file that will hold patterns from the ".gitignore" file. @@ -28,10 +39,10 @@ EXTENSIONS=() if [ ${#EXTENSIONS[@]} -gt 0 ]; then EXTENSIONS_PATTERN=$(printf '\\|%s' "${EXTENSIONS[@]}") REGEX_EXPRESSION=".*\.\($EXTENSIONS_PATTERN\)$" - echo "Regex Expression: $REGEX_EXPRESSION" else REGEX_EXPRESSION=".*$" fi +echo "Regex Expression: $REGEX_EXPRESSION" # Look for all files in the current directory and its subdirectories, # excluding the ".gitignore" file itself. find . -regex $REGEX_EXPRESSION From b3c33a0ca3e46e0b13775c4adc4bc9b8d1c99899 Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 16:49:12 +0800 Subject: [PATCH 4/7] feat(consolidate): add period stripping from extensions in args --- bin/consolidate | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/consolidate b/bin/consolidate index 82ac66a..35270cc 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -11,13 +11,15 @@ if [ $# -gt 0 ]; then EXTENSIONS=() for var in $@ do + # Strip the . from the given extension if user gives the extension with a period (like .md) + var=${var/"."/} EXTENSIONS+=("${var}") done else echo "0 arguments entered. Searching all files" EXTENSIONS=() fi -echo ${EXTENSIONS} +echo ${EXTENSIONS[@]} # Prepare a temporary file: # Create a temporary file that will hold patterns from the ".gitignore" file. From 1929e44f38af8202cc29913084744c8062da7c25 Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 19:22:48 +0800 Subject: [PATCH 5/7] feat(consolidate): finished feature --- bin/consolidate | 155 ++++++++++++++++++++++++++++++------------------ 1 file changed, 98 insertions(+), 57 deletions(-) diff --git a/bin/consolidate b/bin/consolidate index 35270cc..ce0e6f0 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -1,65 +1,106 @@ #!/bin/bash +# Function to get the list of extensions from the arguments passed to the script +get_extensions() { + # Check for input: + # If the user provides any arguments, store those as the list of file extensions to look for. + # If no arguments are given, leave extensions array empty to let the regex pattern search for everything. + if [ $# -gt 0 ]; then + EXTENSIONS=() + for var in $@ + do + # Strip the . from the given extension if user gives the extension with a period (like .md) + var=${var/"."/} + EXTENSIONS+=("${var}") + done + else + EXTENSIONS=() + fi +} + +# Function to get the current date in YYYY-MM-DD format +get_current_date() { + date +%F +} + +# Function to ignore files based on .gitignore +is_ignored() { + local filepath="$1" + git check-ignore -q "$filepath" # Returns 0 if ignored, 1 otherwise + return $? +} + +# Function to process a single file +process_file() { + local filepath="$1" + local header="# $filepath" + { + echo "$header" + echo "" # Print a blank line + cat "$filepath" + echo "" # Print a blank line after file content + } >> "$output_file" +} + +# Function to traverse directories recursively +traverse_directory() { + local dir="$1" + + # The regex pattern starts with '.*', which searches for any matching characters + # '.' is a wildcard, while '*' means match as many occurrences of the preceding char + # '\.' escapes the '.' special char to search for the actual char + # () is a capturing group. The | character is an OR operator + # '$' indicates the preceding string should be followed by the end of line + if [ ${#EXTENSIONS[@]} -gt 0 ]; then + EXTENSIONS_PATTERN=$(printf '\\|%s' "${EXTENSIONS[@]}") + pattern=".*\.\($EXTENSIONS_PATTERN\)$" + else + pattern=".*$" + fi + + # Loop through each item in the directory + for entry in "$dir"/*; do + # Check if entry exists (handles empty directories) + if [[ -e "$entry" ]]; then + if [[ -d "$entry" ]]; then + # Check if the directory is .git + if [[ "$(basename "$entry")" != ".git" ]]; then + # Recursively traverse the directory + traverse_directory "$entry" + fi + elif [[ -f "$entry" ]]; then + # Ignore .gitignore files and files in .gitignore + if [[ "$(basename "$entry")" != ".gitignore" ]] && + ! is_ignored "$entry" && + [[ "$entry" =~ $pattern ]]; then # Check against regex pattern + process_file "$entry" + fi + fi + fi + done +} + + +# Main function +main() { + get_extensions + + output_file="output-$(get_current_date).txt" + + # Clear previous output file (if exists) + > "$output_file" + + # Start traversing from the current directory + traverse_directory "." + + echo "Consolidation complete! Output saved in: $output_file" +} + # Start the process. echo "Consolidating..." -# Check for input: -# If the user provides any arguments, store those as the list of file extensions to look for. -# If no arguments are given, leave extensions array empty to let the regex pattern search for everything. -if [ $# -gt 0 ]; then - echo "$# arguments entered" - EXTENSIONS=() - for var in $@ - do - # Strip the . from the given extension if user gives the extension with a period (like .md) - var=${var/"."/} - EXTENSIONS+=("${var}") - done -else - echo "0 arguments entered. Searching all files" - EXTENSIONS=() -fi -echo ${EXTENSIONS[@]} - -# Prepare a temporary file: -# Create a temporary file that will hold patterns from the ".gitignore" file. - -# Read the .gitignore file: -# If the ".gitignore" file is present, read it and pull out lines that are not comments or blank. -# Store these patterns in the temporary file. - -# Create an output file: -# Get the current date and time, and create an output file named using this timestamp. - -### Find files: - -# The regex pattern starts with '.*', which searches for any matching characters. -# '.' is a wildcard, while '*' means match as many occurrences of the preceding char. -# '\.' escapes the '.' special char to search for the actual char. -# () is a capturing group. The | character is an OR operator -# '$' indicates the preceding string should be followed by the end of line -if [ ${#EXTENSIONS[@]} -gt 0 ]; then - EXTENSIONS_PATTERN=$(printf '\\|%s' "${EXTENSIONS[@]}") - REGEX_EXPRESSION=".*\.\($EXTENSIONS_PATTERN\)$" -else - REGEX_EXPRESSION=".*$" -fi -echo "Regex Expression: $REGEX_EXPRESSION" -# Look for all files in the current directory and its subdirectories, -# excluding the ".gitignore" file itself. -find . -regex $REGEX_EXPRESSION -# Filter these files based on the specified extensions. -# Exclude any files that appear to be binary. -# Exclude any files that match patterns listed in the ".gitignore". - -# Process each file: -# For each file that matches the criteria: -# - Write a header to the output file that includes the filename. -# - Add the content of the file to the output file. -# - Finish with a closing code block marker. - -# Clean up: -# Delete the temporary file holding the patterns from the ".gitignore". +# Execute main function +main # Complete the process: # Inform the user that the operation is complete and let them know where the consolidated output has been saved. From b0236cc79646d6e5ab85e1a2847b693ef19c71af Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 19:26:36 +0800 Subject: [PATCH 6/7] refactor(consolidate): clean up script --- bin/consolidate | 90 ++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 54 deletions(-) diff --git a/bin/consolidate b/bin/consolidate index ce0e6f0..311854b 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -1,107 +1,89 @@ #!/bin/bash -# Function to get the list of extensions from the arguments passed to the script +# Functions: + +# Get the list of specified file extensions from script arguments get_extensions() { - # Check for input: - # If the user provides any arguments, store those as the list of file extensions to look for. - # If no arguments are given, leave extensions array empty to let the regex pattern search for everything. if [ $# -gt 0 ]; then + # Create an array for extensions EXTENSIONS=() - for var in $@ - do - # Strip the . from the given extension if user gives the extension with a period (like .md) - var=${var/"."/} - EXTENSIONS+=("${var}") + for ext in "$@"; do + # Remove the leading dot if present + ext="${ext#.}" + EXTENSIONS+=("$ext") done else - EXTENSIONS=() + EXTENSIONS=() # No extensions specified, match everything fi } -# Function to get the current date in YYYY-MM-DD format +# Get the current date in YYYY-MM-DD format get_current_date() { date +%F } -# Function to ignore files based on .gitignore +# Check if a file is ignored according to .gitignore is_ignored() { local filepath="$1" git check-ignore -q "$filepath" # Returns 0 if ignored, 1 otherwise - return $? } -# Function to process a single file +# Process a single file: append its header and content to the output file process_file() { local filepath="$1" - local header="# $filepath" { - echo "$header" - echo "" # Print a blank line - cat "$filepath" - echo "" # Print a blank line after file content + echo "# $filepath" # File header + echo "" # Blank line + cat "$filepath" # File content + echo "" # Blank line after content } >> "$output_file" } -# Function to traverse directories recursively +# Recursively traverse directories to find and process files traverse_directory() { local dir="$1" - # The regex pattern starts with '.*', which searches for any matching characters - # '.' is a wildcard, while '*' means match as many occurrences of the preceding char - # '\.' escapes the '.' special char to search for the actual char - # () is a capturing group. The | character is an OR operator - # '$' indicates the preceding string should be followed by the end of line + # Construct regex pattern based on provided extensions if [ ${#EXTENSIONS[@]} -gt 0 ]; then EXTENSIONS_PATTERN=$(printf '\\|%s' "${EXTENSIONS[@]}") - pattern=".*\.\($EXTENSIONS_PATTERN\)$" + pattern=".*\.\($EXTENSIONS_PATTERN\)$" # Pattern for matching specified extensions else - pattern=".*$" + pattern=".*$" # Matches everything if no extensions specified fi - # Loop through each item in the directory + # Loop through entries in the specified directory for entry in "$dir"/*; do - # Check if entry exists (handles empty directories) - if [[ -e "$entry" ]]; then - if [[ -d "$entry" ]]; then - # Check if the directory is .git + if [[ -e "$entry" ]]; then # Check if the entry exists + if [[ -d "$entry" ]]; then # Process directories if [[ "$(basename "$entry")" != ".git" ]]; then - # Recursively traverse the directory - traverse_directory "$entry" + traverse_directory "$entry" # Recursive call fi - elif [[ -f "$entry" ]]; then - # Ignore .gitignore files and files in .gitignore + elif [[ -f "$entry" ]]; then # Process files if [[ "$(basename "$entry")" != ".gitignore" ]] && - ! is_ignored "$entry" && - [[ "$entry" =~ $pattern ]]; then # Check against regex pattern - process_file "$entry" + ! is_ignored "$entry" && + [[ "$entry" =~ $pattern ]]; then # Check if file matches regex + process_file "$entry" # Process the file fi fi fi done } - -# Main function +# Main function to execute the script logic main() { - get_extensions + get_extensions "$@" # Pass all script arguments to get_extensions function - output_file="output-$(get_current_date).txt" + output_file="output-$(get_current_date).txt" # Define the output file name - # Clear previous output file (if exists) + # Clear previous output file if it exists > "$output_file" - # Start traversing from the current directory + # Start the file traversal from the current directory traverse_directory "." - echo "Consolidation complete! Output saved in: $output_file" + echo "Consolidation complete! Output saved in: $output_file" # Completion message } -# Start the process. +# Execution begins here echo "Consolidating..." - -# Execute main function -main - -# Complete the process: -# Inform the user that the operation is complete and let them know where the consolidated output has been saved. -echo "Consolidation complete! See your output file at: " \ No newline at end of file +main "$@" # Invoke main function with script arguments \ No newline at end of file From bd34dd7fe3a7ea745ce2723b85cb2a3601ce1815 Mon Sep 17 00:00:00 2001 From: Mohammad Rafiq Date: Thu, 6 Feb 2025 19:28:17 +0800 Subject: [PATCH 7/7] refactor(consolidate): add filename exclusions --- bin/consolidate | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/bin/consolidate b/bin/consolidate index 311854b..0e6e8ad 100755 --- a/bin/consolidate +++ b/bin/consolidate @@ -51,6 +51,9 @@ traverse_directory() { pattern=".*$" # Matches everything if no extensions specified fi + # Array of filenames to exclude from processing + local EXCLUDED_FILES=("LICENSE" ".gitignore") + # Loop through entries in the specified directory for entry in "$dir"/*; do if [[ -e "$entry" ]]; then # Check if the entry exists @@ -59,9 +62,19 @@ traverse_directory() { traverse_directory "$entry" # Recursive call fi elif [[ -f "$entry" ]]; then # Process files - if [[ "$(basename "$entry")" != ".gitignore" ]] && + # Check if the file should be excluded + local exclude=false + for excluded in "${EXCLUDED_FILES[@]}"; do + if [[ "$(basename "$entry")" == "$excluded" ]]; then + exclude=true + break + fi + done + + # Process the file only if it's not excluded, not ignored, and matches the pattern + if [[ "$exclude" == false ]] && ! is_ignored "$entry" && - [[ "$entry" =~ $pattern ]]; then # Check if file matches regex + [[ "$entry" =~ $pattern ]]; then process_file "$entry" # Process the file fi fi