1
+ #! /bin/bash
2
+
3
+ # Get the absolute path to where the script is located
4
+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
5
+
6
+ # Define the path to parse, assuming it's always in "/docs/docs/guides" relative to the repository root
7
+ GUIDES_DIR=" $( cd " $SCRIPT_DIR /../.." && pwd) /docs/guides"
8
+
9
+ # Define the output CSV file in the script's directory
10
+ OUTPUT_FILE=" $SCRIPT_DIR /urls.csv"
11
+
12
+ # Define the base URL
13
+ BASE_URL=" https://www.linode.com/docs/guides/"
14
+
15
+ # Initialize the CSV file with a header
16
+ echo " URL,Title,Description,Keyword(s),Deprecation Status,Published Date,Updated Date" > " $OUTPUT_FILE "
17
+
18
+ # Function to extract and trim a given field from a line
19
+ extract_field () {
20
+ echo " $1 " | sed " s/^$2 :\s*//" | xargs
21
+ }
22
+
23
+ # Function to sanitize the description field
24
+ sanitize_description () {
25
+ echo " $1 " | sed ' s/"/' \' ' /g; s/' " '" ' /\\' " '" ' /g; s/,/\\,/g; s/:/\\:/g; s/;/\\;/g' | xargs
26
+ }
27
+
28
+ # Function to clean and format the keywords field
29
+ format_keywords () {
30
+ echo " $1 " | sed ' s/[][]//g' | sed ' s/, */, /g' | xargs
31
+ }
32
+
33
+ # Function to parse fields and build the CSV
34
+ parse_directory_recursively () {
35
+ local dir=" $1 "
36
+
37
+ # Find all index.md files recursively in the directory, excluding specified folders
38
+ find " $dir " -type d \( -name " _shortguides" -o -name " concentrations" -o -name " audiences" -o -name " linode-writers-formatting-guide" \) -prune -o -type f -name " index.md" -print | while read -r file; do
39
+ # Initialize default values
40
+ slug=" "
41
+ title=" "
42
+ description=" "
43
+ keywords=" "
44
+ deprecated=" false"
45
+ published_date=" "
46
+ updated_date=" "
47
+
48
+ # Extract fields from each line
49
+ while read -r line; do
50
+ case " $line " in
51
+ slug:* )
52
+ slug=$( extract_field " $line " " slug" )
53
+ ;;
54
+ title:* )
55
+ title=$( extract_field " $line " " title" )
56
+ ;;
57
+ description:* )
58
+ description=$( extract_field " $line " " description" )
59
+ description=$( sanitize_description " $description " )
60
+ ;;
61
+ keywords:* )
62
+ keywords=$( extract_field " $line " " keywords" )
63
+ keywords=$( format_keywords " $keywords " )
64
+ ;;
65
+ deprecated:* )
66
+ deprecated_value=$( extract_field " $line " " deprecated" )
67
+ if [ " $deprecated_value " = " true" ]; then
68
+ deprecated=" true"
69
+ fi
70
+ ;;
71
+ published:* )
72
+ published_date=$( extract_field " $line " " published" )
73
+ ;;
74
+ modified:* )
75
+ updated_date=$( extract_field " $line " " modified" )
76
+ ;;
77
+ esac
78
+ done < " $file "
79
+
80
+ # Construct the full URL without spaces, if slug exists
81
+ if [ -n " $slug " ]; then
82
+ full_url=" ${BASE_URL}${slug} "
83
+
84
+ # Append the data to the CSV file
85
+ echo " \" $full_url \" ,\" $title \" ,\" $description \" ,\" $keywords \" ,\" $deprecated \" ,\" $published_date \" ,\" $updated_date \" " >> " $OUTPUT_FILE "
86
+ fi
87
+ done
88
+ }
89
+
90
+ # Parse the designated guides directory
91
+ parse_directory_recursively " $GUIDES_DIR "
92
+
93
+ echo " Data has been written to $OUTPUT_FILE "
0 commit comments