# Struct Statistics Analysis
This extracts and answers the following questions:
- 1-2. field access inside the loop
- 3. max number of fields in a struct
- 4. has nested structs
- 5. has any storage var of type that is an array of structs
- 6. has any storage var of type that is has values of type structs
- 7. number of structs
- 8. has structs that has at least one member of type array

In [1]:
import json
import os
import statistics

In [2]:
def remove_comments(raw_lines):
    code_lines = []
    sts_comment = False
    for dline in raw_lines:
        tmp_dline = dline.strip()
        if len(tmp_dline)==0:
            continue
        if tmp_dline.startswith("//"):
            continue
        if tmp_dline.startswith("/*"):
            sts_comment = True
        if not sts_comment:
            code_lines.append(tmp_dline)
        if tmp_dline.endswith("*/"):
            sts_comment = False
    return code_lines

def count_loops(code_lines):
    cnt = 0
    for dline in code_lines:
        if dline.startswith("for (") or dline.startswith("while ("):
            cnt += 1
        elif dline.startswith("for(") or dline.startswith("while("):
            cnt += 1
    return cnt

In [3]:
json_files = os.listdir("./contract-benchmarks-master/loops_asts/")

name_pool = [] # just file names
ast_pool = [] # original full ast
loc_pool = [] # line of code
nol_pool = [] # number of loops

for i in range(len(json_files)):
    print("\r# loading jsons {}/{}".format(i, len(json_files)), end="")
    p = json_files[i]
    with open("./contract-benchmarks-master/loops_asts/{}".format(p), "r", encoding="iso-8859-1") as f:
        raw_lines = f.readlines()
        if len(raw_lines)<=1:
            # skip the empty json
            continue
        # remove the header by starting from 4th line
        raw_ast = json.loads("".join(raw_lines[4:]))
    ast_pool.append(raw_ast)
    name_pool.append(p)
    
    # detect the lines of code using the original source code
    with open("./contract-benchmarks-master/verified_contracts/{}".format(p.replace(".json",".sol")), "r", encoding="iso-8859-1") as f:
        dlines = f.readlines()
    clines = remove_comments(dlines)
    
    loc_pool.append(len(clines))
    nol_pool.append(count_loops(clines))

# loading jsons 4493/4494

In [5]:
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the tmp_loop_pool first
tmp_loop_pool = []
def get_loop_node(cnode):
    global tmp_loop_pool
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["ForStatement", "WhileStatement"]:
                tmp_loop_pool.append(cnode)
        for dkey in cnode.keys():
            get_loop_node(cnode[dkey])
    elif isinstance(cnode, list):
        for p in cnode:
            get_loop_node(p)
    else:
        return
    
# extract loops for every file
loop_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_loop_pool = []
    get_loop_node(tmp_ast)
    loop_pool.append(tmp_loop_pool)

# processing 4466/4467

In [6]:
# first get all the user-defined struct of a file
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
tmp_struct_pool = []
def get_struct_by_name(cnode):
    global tmp_struct_pool
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["StructDefinition"]:
                tmp_struct_pool.append(cnode["canonicalName"])
        for dkey in cnode.keys():
            get_struct_by_name(cnode[dkey])
    elif isinstance(cnode, list):
        for p in cnode:
            get_struct_by_name(p)
    else:
        return
    
# get the canonical name of all structs defined in a file
struct_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_struct_pool = []
    get_struct_by_name(tmp_ast)
    struct_pool.append(tmp_struct_pool)

# processing 4466/4467

In [11]:
# for y in struct_pool:
#     if len(y)>0:
#         print(y)

In [7]:
# 1-2. field access inside the loop
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
flag_field_access = False
def detect_field_access(cnode, slist):
    global flag_field_access
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["MemberAccess"]:
                if "expression" in cnode.keys():
                    for tp in slist:
                        if tp in cnode["expression"]["typeDescriptions"]["typeString"]:
                            flag_field_access = True
                            return # terminate
                else:
                    raise NotImplementedError("Let me know!")
        for dkey in cnode.keys():
            detect_field_access(cnode[dkey], slist)
    elif isinstance(cnode, list):
        for p in cnode:
            detect_field_access(p, slist)
    else:
        return
    
# detect whether there's field access of a self-defined struct
field_access_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_llist = loop_pool[i]
    tmp_slist = struct_pool[i]
    flag_field_access = False
    # zoom into every loop
    for q in tmp_llist:
        detect_field_access(q, tmp_slist)
    field_access_pool.append(flag_field_access)

# processing 4466/4467

In [27]:
# for i in range(len(ast_pool)):
#     if field_access_pool[i]:
#         print("{}, {}".format(i, name_pool[i]))

3, SignalToken.json
6, CryptoflipCar.json
7, AgileCycle.json
12, BetOnWorldCupFinal.json
14, EnsOwnerProxy.json
17, UNITStagesManager.json
18, TrueDeckToken.json
19, Danku_demo.json
20, A21Builder.json
28, bet_various.json
32, AdvertisementFinance.json
44, HorseyToken.json
48, ActionPresell.json
49, ALCCrowdsale.json
54, TrumpBingo.json
70, YopoInvest.json
71, LottoPI.json
78, CryptoPhoenixesCivilWar.json
81, NervesSmartStaking.json
91, AngelTokensHolder.json
92, TwoXJackpot.json
95, QuantstampAuditView.json
97, fortunes.json
99, McwCustomerRegistry.json
100, AdvertisementStorage.json
109, TwoCoinsOneMoonGame.json
110, RankingBallGoldCustomToken.json
111, Partnership.json
115, Billboard.json
119, EthPlot.json
121, UNITDummyPaymentGateway.json
134, Daocoin.json
139, DigitalGame.json
151, MultiSigERC20Token.json
153, DRCTLibrary.json
155, CrowdsaleTokenController.json
156, CJC.json
157, ERC20CappedLong.json
166, TipToken.json
175, ChemistryCore.json
181, ABAToken.json
190, AlgoryPricingS

In [8]:
# 3. max number of fields in a struct
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
tmp_max_nfields = 0
def get_max_nfields(cnode):
    global tmp_max_nfields
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["StructDefinition"]:
                if len(cnode["members"])>tmp_max_nfields:
                    tmp_max_nfields = len(cnode["members"])
        for dkey in cnode.keys():
            get_max_nfields(cnode[dkey])
    elif isinstance(cnode, list):
        for p in cnode:
            get_max_nfields(p)
    else:
        return
    
# get the canonical name of all structs defined in a file
max_nfields_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_max_nfields = 0
    get_max_nfields(tmp_ast)
    max_nfields_pool.append(tmp_max_nfields)

# processing 4466/4467

In [19]:
# 4. has nested structs
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
flag_nested_struct = False
def detect_nested_struct(cnode, slist):
    global flag_nested_struct
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["StructDefinition"]:
                # for every member, seek for existing self-defined structs
                for mm in cnode["members"]:
                    for tp in slist:
                        if tp in mm["typeDescriptions"]["typeString"]:
                        # if "struct" in mm["typeDescriptions"]["typeString"]:
                            flag_nested_struct = True
                            return # terminate
        for dkey in cnode.keys():
            detect_nested_struct(cnode[dkey], slist)
    elif isinstance(cnode, list):
        for p in cnode:
            detect_nested_struct(p, slist)
    else:
        return
    
# detect whether there's nested struct 
nested_struct_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_llist = loop_pool[i]
    tmp_slist = struct_pool[i]
    flag_nested_struct = False
    detect_nested_struct(tmp_ast, tmp_slist)
    nested_struct_pool.append(flag_nested_struct)

# processing 4466/4467

In [13]:
# name_pool.index("EtherLab.json")

773

In [14]:
# struct_pool[773]

['EtherLab.Deposit', 'EtherLab.User']

In [20]:
# for i in range(len(ast_pool)):
#     if nested_struct_pool[i]:
#         print("{}, {}".format(i, name_pool[i]))

14, EnsOwnerProxy.json
17, UNITStagesManager.json
20, A21Builder.json
54, TrumpBingo.json
81, NervesSmartStaking.json
87, DgxDemurrageReporter.json
95, QuantstampAuditView.json
153, DRCTLibrary.json
157, ERC20CappedLong.json
229, WeSingCoin223Token_11.json
285, Main0018_preTokenSigners.json
297, ERC20CappedShort.json
321, PromoCodes.json
332, MilFold.json
336, ServiceController.json
354, Version.json
364, EasyInvestUP.json
382, PULSToken.json
385, BlockchainBattleground.json
433, BullsAndCows.json
438, ClaimHolderPresigned.json
444, Subscription.json
445, Goo.json
446, ERC20PositionWithdrawerV2.json
449, CryptoCupVirtualMatch.json
475, EtherTokenExchange.json
484, Parameterizer.json
503, BookingPoC.json
506, RequestOMG.json
510, SlotLottery.json
523, XG4KCrowdFunding.json
585, EthBox.json
608, OwnedUpgradeabilityProxy.json
651, PLCRVoting.json
663, ADXExchange.json
664, Famo.json
681, WorkerPoolHub.json
688, DogRace.json
697, EvenDistroCrowdsaleLib.json
701, EXLINKCOIN.json
712, Crypto

In [22]:
# 5. has any storage var of type that is an array of structs
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
flag_array_struct = False
def detect_array_struct(cnode, slist):
    global flag_array_struct
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if "nodeType" in ["ParameterList"]:
                # don't count the declaration in parameter list
                return
            if "typeDescriptions" in cnode.keys():
                if "typeString" in cnode["typeDescriptions"].keys():
                    # if typeString is None, then the node represents a literal constant
                    if cnode["typeDescriptions"]["typeString"] is not None:
                        if "[]" in cnode["typeDescriptions"]["typeString"]:
                            for tp in slist:
                                if tp in cnode["typeDescriptions"]["typeString"]:
                                    flag_array_struct = True
                                    return # terminate
        for dkey in cnode.keys():
            detect_array_struct(cnode[dkey], slist)
    elif isinstance(cnode, list):
        for p in cnode:
            detect_array_struct(p, slist)
    else:
        return
    
# detect whether there's nested struct 
array_struct_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_llist = loop_pool[i]
    tmp_slist = struct_pool[i]
    flag_array_struct = False
    detect_array_struct(tmp_ast, tmp_slist)
    array_struct_pool.append(flag_array_struct)

# processing 4466/4467

In [19]:
# for i in range(len(ast_pool)):
#     if array_struct_pool[i]:
#         print("{}, {}".format(i, name_pool[i]))

In [23]:
# 6. has any storage var of type that is has values of type structs
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
flag_map_struct = False
def detect_map_struct(cnode, slist):
    global flag_map_struct
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if "nodeType" in ["ParameterList"]:
                # don't count the declaration in parameter list
                return
            if "typeDescriptions" in cnode.keys():
                if "typeString" in cnode["typeDescriptions"].keys():
                    # if typeString is None, then the node represents a literal constant
                    if cnode["typeDescriptions"]["typeString"] is not None:
                        if "map" in cnode["typeDescriptions"]["typeString"]:
                            for tp in slist:
                                if tp in cnode["typeDescriptions"]["typeString"]:
                                    flag_map_struct = True
                                    return # terminate
        for dkey in cnode.keys():
            detect_map_struct(cnode[dkey], slist)
    elif isinstance(cnode, list):
        for p in cnode:
            detect_map_struct(p, slist)
    else:
        return
    
# detect whether there's nested struct 
map_struct_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    tmp_llist = loop_pool[i]
    tmp_slist = struct_pool[i]
    flag_map_struct = False
    detect_map_struct(tmp_ast, tmp_slist)
    map_struct_pool.append(flag_map_struct)

# processing 4466/4467

In [26]:
# for i in range(len(ast_pool)):
#     if map_struct_pool[i]:
#         print("{}, {}".format(i, name_pool[i]))

In [27]:
# 7. number of structs
nstruct_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_slist = struct_pool[i]
    nstruct_pool.append(len(tmp_slist))

# processing 4466/4467

In [28]:
# 8. has structs that has at least one member of type array
# (warning) I'm lazy so here I use a global temporary variable, beware!
# when you call this function on top level, clear the global temp var first
flag_array_member = False
def detect_array_member(cnode):
    global flag_array_member
    if cnode is None:
        return
    if isinstance(cnode, dict):
        if "nodeType" in cnode.keys():
            if cnode["nodeType"] in ["StructDefinition"]:
                for mm in cnode["members"]:
                    if "[]" in mm["typeDescriptions"]["typeString"]:
                        flag_array_member = True
                        return # terminate
        for dkey in cnode.keys():
            detect_array_member(cnode[dkey])
    elif isinstance(cnode, list):
        for p in cnode:
            detect_array_member(p)
    else:
        return
    
# get the canonical name of all structs defined in a file
array_member_pool = []
for i in range(len(ast_pool)):
    print("\r# processing {}/{}".format(i, len(ast_pool)),end="")
    tmp_ast = ast_pool[i]
    flag_array_member = False
    detect_array_member(tmp_ast)
    array_member_pool.append(flag_array_member)

# processing 4466/4467

In [23]:
# for i in range(len(ast_pool)):
#     if array_member_pool[i]:
#         print("{}, {}".format(i, name_pool[i]))

6, CryptoflipCar.json
19, Danku_demo.json
32, AdvertisementFinance.json
44, HorseyToken.json
63, SecurityTokenRegistrar.json
81, NervesSmartStaking.json
100, AdvertisementStorage.json
139, DigitalGame.json
153, DRCTLibrary.json
174, Empires.json
214, GamerTokenSale.json
241, Win20ETH.json
285, Main0018_preTokenSigners.json
305, ICUCrowdsale.json
321, PromoCodes.json
327, ICUAgent.json
332, MilFold.json
382, PULSToken.json
385, BlockchainBattleground.json
433, BullsAndCows.json
438, ClaimHolderPresigned.json
444, Subscription.json
445, Goo.json
465, zombieInvasion.json
510, SlotLottery.json
581, IkuraAssociation.json
585, EthBox.json
619, ELOT.json
663, ADXExchange.json
665, TotlePrimary.json
681, WorkerPoolHub.json
684, AssetManager.json
688, DogRace.json
697, EvenDistroCrowdsaleLib.json
701, EXLINKCOIN.json
718, BetGame.json
720, NovaGame.json
770, Treaties.json
773, EtherLab.json
784, OutLuck100.json
815, KittyPillar.json
827, CryptoTreasure.json
835, Variation.json
848, Riveth.json


In [29]:
import csv
with open('./struct_stats.csv', 'w', newline='') as csvfile:
    fieldnames = ['file', 'nlines', 'nloops', 'Q1-2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8']
    spamwriter = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=',')
    spamwriter.writeheader()
    for i in range(len(ast_pool)):
        spamwriter.writerow({
            'file': name_pool[i],
            'nlines': loc_pool[i],
            'nloops': nol_pool[i],
            'Q1-2': field_access_pool[i], # 1-2
            'Q3': max_nfields_pool[i], # 3
            'Q4': nested_struct_pool[i], # 4
            'Q5': array_struct_pool[i], # 5
            'Q6': map_struct_pool[i], # 6
            'Q7': nstruct_pool[i], # 7
            'Q8': array_member_pool[i], # 8
        })