-
Notifications
You must be signed in to change notification settings - Fork 0
/
unpublish-datasets.sh
executable file
·66 lines (57 loc) · 2.22 KB
/
unpublish-datasets.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
set -ex
# Confirm WIKI_ID is set and doesn't have a "*" character
# in it to avoid nuking the entire repo.
if [ -z "$WIKI_ID" ]; then
echo "WIKI_ID must be set."
exit 1
elif [[ $WIKI_ID == *"*"* ]]; then
echo "WIKI_ID cannot have a '*' character in it."
exit 1
fi
DATASET_PUBLISH_PARENT_DIRECTORY="/srv/published/datasets/one-off/research-mwaddlink"
DATASET_PUBLISH_CHILD_DIRECTORY="$DATASET_PUBLISH_PARENT_DIRECTORY/$WIKI_ID"
DATASET_LISTING_FILE="$DATASET_PUBLISH_PARENT_DIRECTORY/wikis.txt"
# Function to remove files if they exist
function remove_file() {
# Get the file path and pattern
DIRECTORY_PATH=$1
FILE_PATTERN=$2
# Find all files that match the pattern
FILES=$(find "$DIRECTORY_PATH" -type f -name "$FILE_PATTERN" -print)
# Check if any files were found
if [ -z "$FILES" ]; then
echo "No files found matching the pattern '$FILE_PATTERN'."
else
# Delete the files found
for FILE in $FILES; do
rm -f "$FILE"
echo $FILE" removed."
done
fi
}
# Confirm whether the directory exists.
if [ -d "$DATASET_PUBLISH_CHILD_DIRECTORY" ]; then
# Delete the directory content.
# code below is not DRY because we avoided `rm -rf <directory>` for safety.
remove_file "$DATASET_PUBLISH_CHILD_DIRECTORY/" "*.sql.gz"
remove_file "$DATASET_PUBLISH_CHILD_DIRECTORY/" "*.sqlite.gz"
remove_file "$DATASET_PUBLISH_CHILD_DIRECTORY/" "$WIKI_ID.linkmodel.json"
remove_file "$DATASET_PUBLISH_CHILD_DIRECTORY/" "*.checksum"
remove_file "$DATASET_PUBLISH_CHILD_DIRECTORY/" "README"
# Delete the directory if it is empty.
if [ "$(ls -A $DATASET_PUBLISH_CHILD_DIRECTORY)" ]; then
echo "$DATASET_PUBLISH_CHILD_DIRECTORY is not empty, check its content before removing. \
If the remaining files are not one-offs, please add them to this unpublish script."
else
echo "$DATASET_PUBLISH_CHILD_DIRECTORY is empty, removing ..."
rmdir "$DATASET_PUBLISH_CHILD_DIRECTORY"
echo $WIKI_ID" has been removed from the published datasets repo."
fi
fi
# Confirm whether the WIKI_ID exists in the index.
if grep -q "^$WIKI_ID$" "$DATASET_LISTING_FILE"; then
# Remove the WIKI_ID from the index.
sed -i "/^$WIKI_ID$/d" $DATASET_LISTING_FILE
echo $WIKI_ID" has been delisted from the index."
fi