-
Notifications
You must be signed in to change notification settings - Fork 5
/
mypos2json.pl
42 lines (38 loc) · 1.14 KB
/
mypos2json.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
# for mypos data to json format conversion
# Written by Ye Kyaw Thu, LST, NECTEC, Thailand
# How to run:
# e.g. perl ./mypos2json.pl <input-file>
# perl ./mypos2json.pl ./mypos-dver.1.0.txt.clean > ./mypos.json
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
binmode(STDERR, ":utf8");
open (my $inputFILE,"<:encoding(utf8)", $ARGV[0]) or die "Couldn't open input file $ARGV[0]!, $!\n";
print("[\n");
while(my $line = <$inputFILE>)
{
if ($line!~/^$/)
{
chomp ($line);
$line =~ s/\|/ /g; # if your data not using pipe "|" comment out this line
$line =~ s/\"/\\\"/g; # escaping double quote
my @token = split('\s', $line);
print(" [\n"); my $tmpStr="";
foreach my $one_token(@token)
{
my($word, $POS) = split('\/', $one_token);
$tmpStr=$tmpStr.",\n [\"$word\"\,\"$POS\"]";
}
$tmpStr=~s/^,\n//; $tmpStr=$tmpStr."\n ]";
if (eof($inputFILE)) {
print("$tmpStr\n");
}else {
print("$tmpStr,\n");
}
}
}
print("]\n");
close($inputFILE);