-
-
Notifications
You must be signed in to change notification settings - Fork 714
/
unpax-icu.sh
executable file
·167 lines (145 loc) · 5.52 KB
/
unpax-icu.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/qsh
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (C) 2000-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Barry Novinger
# Steven R. Loomis
# George Rhoten
# Jason Spieth
#
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
# After extracting to EBCDIC, binary files are re-extracted without the
# EBCDIC conversion, thus restoring them to original codepage.
if [ -z "$QSH_VERSION" ];
then
QSH=0
echo "QSH not detected (QSH_VERSION not set) - just testing."
else
QSH=1
#echo "QSH version $QSH_VERSION"
fi
export QSH
# set this to "v" to list files as they are unpacked (default)
VERBOSE_UNPACK="v"
# Set the following variable to the list of binary file suffixes (extensions)
#****************************************************************************
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
#ICU specific binary files
#****************************************************************************
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE'
data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
#****************************************************************************
# Function: usage
# Description: Prints out text that describes how to call this script
# Input: None
# Output: None
#****************************************************************************
usage()
{
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
}
#****************************************************************************
# first make sure we at least one arg and it's a file we can read
#****************************************************************************
# check for no arguments
if [ $# -eq 0 ]; then
usage
exit
fi
# tar file is argument 1
tar_file=$1
# check that the file is valid
if [ ! -r $tar_file ]; then
echo "$tar_file does not exist or cannot be read."
usage
exit
fi
# treat all data files as ebcdic
ebcdic_data=$data_files
#****************************************************************************
# Extract files. We do this in two passes. One pass for 819 files and a
# second pass for 37 files
#****************************************************************************
echo ""
echo "Extracting from $tar_file ..."
echo ""
# extract everything as iso-8859-1 except these directories
pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data
# extract files while converting them to EBCDIC
echo ""
echo "Extracting files which must be in ibm-37 ..."
echo ""
pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data
#****************************************************************************
# For files we have restored as CCSID 37, check the BOM to see if they
# should be processed as 819. Also handle files with special paths. Files
# that match will be added to binary files lists. The lists will in turn
# be processed to restore files as 819.
#****************************************************************************
echo ""
echo "Determining binary files by BOM ..."
echo ""
bin_count=0
# Process BOMs
if [ -f icu/as_is/bomlist.txt ];
then
echo "Using icu/as_is/bomlist.txt"
pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt`
else
for file in `find ./icu \( -name \*.txt -print \)`; do
bom8=`head -n 1 $file|\
od -t x1|\
head -n 1|\
sed 's/ */ /g'|\
cut -f2-4 -d ' '|\
tr 'A-Z' 'a-z'`;
#Find a converted UTF-8 BOM
if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ]
then
file="`echo $file | cut -d / -f2-`"
if [ `echo $binary_files | wc -w` -lt 200 ]
then
bin_count=`expr $bin_count + 1`
binary_files="$binary_files $file";
else
echo "Restoring binary files by BOM ($bin_count)..."
rm $binary_files;
pax -C 819 -rvf $tar_file $binary_files;
echo "Determining binary files by BOM ($bin_count)..."
binary_files="$file";
bin_count=`expr $bin_count + 1`
fi
fi
done
# now see if a re-extract of binary files is necessary
if [ `echo $binary_files | wc -w` -gt 0 ]
then
echo "Restoring binary files ($bin_count) ..."
rm $binary_files
pax -C 819 -rvf $tar_file $binary_files
fi
fi
echo "# Processing special paths."
# Process special paths
more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g')
# echo "Looking for additional files: find ... $more_bin_opts"
more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \) -print)
echo "Restoring binary files by special paths ($bin_count) ..."
rm $more_bin_files
pax -C 819 -rvf $tar_file $more_bin_files
#****************************************************************************
# Generate and run the configure script
#****************************************************************************
echo ""
echo "Generating qsh compatible configure ..."
echo ""
sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp
del -f icu/source/configure
mv icu/source/configureTemp icu/source/configure
chmod 755 icu/source/configure
echo ""
echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."