In [20]:
import json

In [200]:
def rejectCancelled(data):
    return [t for t in data['tasks'] if not t['status']=="cancelled"]

def typesToStringUpTo(i, tcs):
    return ".".join(tcs[0:i+1])

appealIdsLimit=5
appealIdsDict={}
tcsCountsDict={}
tcsSetDict={}
nextlinksDict={}
backlinksDict={}

def flatten(data):
    taskscreationseq=list(map(lambda task: task['type']+"_"+task['assigned_to_type'], rejectCancelled(data)))
    for i in range(len(taskscreationseq)):
        task=taskscreationseq[i]
        if i>0:
            backlinksDict[task]=backlinksDict.get(task, {})
            backlinksDict[task][taskscreationseq[i-1]]=backlinksDict[task].get(taskscreationseq[i-1], 0)+1
        if i+1<len(taskscreationseq):
            nextlinksDict[task]=nextlinksDict.get(task, {})
            nextlinksDict[task][taskscreationseq[i+1]]=nextlinksDict[task].get(taskscreationseq[i+1], 0)+1
        
        typePrefix=typesToStringUpTo(i, taskscreationseq)
        
        tcsSetDict[task]=tcsSetDict.get(task, set())
        tcsSetDict[task].add(typePrefix)
        
        tcsCountsDict[typePrefix]=tcsCountsDict.get(typePrefix,0)+1
        
        appealIdsDict[typePrefix]=appealIdsDict.get(typePrefix, [])
        if(len(appealIdsDict[typePrefix])<appealIdsLimit):
            appealIdsDict[typePrefix].append(data['appeal_id'])
            
        #print(i, tcsCountsDict[typePrefix], appealIdsDict[typePrefix], tcsSetDict[tcs], typePrefix)
    #del data['tasks']
    return { 'docket': data['docket_type'], 'tcs': taskscreationseq}

In [218]:
appealIdsDict.clear()
tcsCountsDict.clear()
tcsSetDict.clear()
nextlinksDict.clear()
backlinksDict.clear()

with open('prepped2.json', 'w') as pf:
    with open('input2.json') as jf:
        for count, line in enumerate(jf):
            data = json.loads(line)
            #removeExtraFields(data)
            flatdata=flatten(data)
            #print(count, data['appeal_id'], flatdata)
            pf.write(json.dumps(flatdata)+"\n")



In [286]:
def create_tasklist():
    with open('tasklist.md', 'w') as tlf:
        tlf.write(f'# Task Listing\n\n')
        listing={}
        for taskname,tcsSet in tcsSetDict.items():
            count=sum([tcsCountsDict[tcs] for tcs in tcsSet])
            listing[taskname]=count
        for taskname,count in sorted(listing.items(), key=lambda kv: kv[1], reverse=True):
            tlf.write(f'   * [{taskname}](docs/{taskname}.md) ({count} occurrences)\n')

create_tasklist()

In [None]:
print(appealIdsDict)            
print(tcsCountsDict)
print(tcsSetDict)

In [297]:
import os
os.path.isdir('docs') or os.mkdir('docs')
os.path.isdir('docs/uml') or os.mkdir('docs/uml')
os.path.isdir('docs/dot') or os.mkdir('docs/dot')

def abbrev(tcs):
    return ''.join(filter(lambda x: x.isupper() or x=='.', str(tcs)))

def gen_md_files():
    for taskname,tcsSet in tcsSetDict.items():
        #print(task, tcsSet)
        with open('docs/'+taskname+'.md', 'w') as mdf:
            #mdf.write('# '+taskname.split("_")[0]+" "+taskname.split("_")[1]+'\n\n')
            mdf.write(f'# {taskname}\n\n')
            mdf.write(f"<details><summary>Links for {taskname}</summary>\n\n```\n")
            graphviz=gen_graphviz(nextlinksDict, backlinksDict, taskname)
            mdf.write(graphviz)
            mdf.write('```\n</details>\n\n')
            mdf.write(f'![{taskname}](dot/{taskname}.dot.png)\n\n')
            with open(f'docs/dot/{taskname}.dot', 'w') as gvf:
                gvf.write(graphviz)

            mdf.write('## Nextlinks\n\n')
            if taskname in nextlinksDict:
                for link,count in sorted(nextlinksDict[taskname].items(), key=lambda kv: kv[1], reverse=True):
                    mdf.write(f"   * {count} [{link}]({link}.md)\n")
            mdf.write("\n")
            mdf.write('## Backlinks\n\n')
            if taskname in backlinksDict:
                for link,count in sorted(backlinksDict[taskname].items(), key=lambda kv: kv[1], reverse=True):
                    mdf.write(f"   * {count} [{link}]({link}.md)\n")
            mdf.write("\n")

            for tcs in sorted(tcsSet, key=lambda k: tcsCountsDict[k], reverse=True):
                mdf.write(gen_tcs_section(tcs, tcsCountsDict[tcs], appealIdsDict[tcs]))

def gen_tcs_section(tcs, count, example_appeal_ids):
    tcsName=abbrev(tcs)
    tstr=f"## {tcsName}\n\n"
    tstr+=f"{count} occurrences (example appeals: {example_appeal_ids})\n\n"
    appealId=appealIdsDict[tcs][0]
    tstr+=f"<details><summary>PlantUML for {appealId}</summary>\n\n```\n"
    appeal=find_appeal("input2.json", appealId)
    plantuml=gen_plantuml(appeal)
    tstr+=plantuml
    tstr+='```\n</details>\n\n'
    
    tstr+=f'![{tcsName}-{appealId}](uml/{tcsName}-{appealId}.png)\n\n'
    # create associated plantUML file to generate png
    with open(f'docs/uml/{tcsName}-{appealId}.uml', 'w') as umlf:
        umlf.write(plantuml)
        
    return tstr
            
gen_md_files()

In [295]:
def gen_graphviz(nextlinksDict, backlinksDict, *tasknames):
    edges=set()
    for taskname in tasknames:
        if taskname in nextlinksDict:
            for link,count in sorted(nextlinksDict[taskname].items(), key=lambda kv: kv[1], reverse=True):
                edges.add(f'"{taskname}" -> "{link}" [label={count}]')
        if taskname in backlinksDict:
            for link,count in sorted(backlinksDict[taskname].items(), key=lambda kv: kv[1], reverse=True):
                edges.add(f'"{link}" -> "{taskname}" [label={count}]')
    gstr='digraph G {\nrankdir="LR";\n'
    gstr+="\n".join(edges)
    gstr+="\n}\n"
    return gstr

def save_graphviz(nextlinksDict, backlinksDict, *tasknames):
    tcsName=abbrev(".".join(tasknames))
    with open(f'docs/dot/{tcsName}.dot', 'w') as gvf:
        gvf.write(gen_graphviz(nextlinksDict, backlinksDict, *tasknames))

print(gen_graphviz(nextlinksDict, backlinksDict, "RootTask_Organization", "DistributionTask_Organization", "EvidenceSubmissionWindowTask_Organization"))
save_graphviz(nextlinksDict, backlinksDict, "RootTask_Organization", "DistributionTask_Organization", "EvidenceSubmissionWindowTask_Organization")

digraph G {
rankdir="LR";
"DistributionTask_Organization" -> "TrackVeteranTask_Organization" [label=1]
"EvidenceSubmissionWindowTask_Organization" -> "TrackVeteranTask_Organization" [label=2]
"EvidenceSubmissionWindowTask_Organization" -> "SpecialCaseMovementTask_User" [label=1]
"DistributionTask_Organization" -> "JudgeAssignTask_User" [label=16]
"EvidenceSubmissionWindowTask_Organization" -> "JudgeAssignTask_User" [label=2]
"DistributionTask_Organization" -> "ScheduleHearingTask_Organization" [label=128]
"RootTask_Organization" -> "DistributionTask_Organization" [label=102]
"DistributionTask_Organization" -> "EvidenceOrArgumentMailTask_Organization" [label=1]
"EvidenceSubmissionWindowTask_Organization" -> "InformalHearingPresentationTask_Organization" [label=9]
"DistributionTask_Organization" -> "SpecialCaseMovementTask_User" [label=1]
"DistributionTask_Organization" -> "EvidenceSubmissionWindowTask_Organization" [label=103]
"RootTask_Organization" -> "TrackVeteranTask_Organization" [

In [243]:
tcsSetDict['DistributionTask_Organization']

{'DistributionTask_Organization',
 'RootTask_Organization.DistributionTask_Organization',
 'RootTask_Organization.TrackVeteranTask_Organization.DistributionTask_Organization',
 'RootTask_Organization.TrackVeteranTask_Organization.TrackVeteranTask_Organization.DistributionTask_Organization'}

In [198]:
print(re.search("[A-Z]","RootTask_Organization.TrackVeteranTask_Organization.n"))
print(dir(""))
print(str(['a', 'b']))
print(abbrev("RootTask_Organization.TrackVeteranTask_Organization.n"))

<re.Match object; span=(0, 1), match='R'>
['__add__', '__class__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mod__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmod__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'capitalize', 'casefold', 'center', 'count', 'encode', 'endswith', 'expandtabs', 'find', 'format', 'format_map', 'index', 'isalnum', 'isalpha', 'isascii', 'isdecimal', 'isdigit', 'isidentifier', 'islower', 'isnumeric', 'isprintable', 'isspace', 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'maketrans', 'partition', 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase', 'title', 'translate', 'upper', 'zfill']
['a', 'b']
RTOTVTO


In [247]:
import re
import sys

def find_appeal(input_f, appeal_id):
    with open(input_f, "r") as f:
        for count, line in enumerate(f):
            if re.search(f"\"appeal_id\":{appeal_id},", line):
                data = json.loads(line)
                return data
            
appeal_id="41852"
appeal=find_appeal("input2.json", appeal_id)
print(appeal['appeal_id']==int(appeal_id), appeal)

True {'appeal_id': 41852, 'docket_type': 'direct_review', 'updated_at': '2019-11-13 19:13:22 UTC', 'tasks': [{'id': 514013, 'status': 'cancelled', 'type': 'RootTask', 'created_at': '2019-11-12T17:14:08.206Z', 'updated_at': '2019-11-12T17:14:08.877Z', 'assigned_to_type': 'Organization', 'parent_id': None}, {'id': 514014, 'status': 'completed', 'type': 'DistributionTask', 'created_at': '2019-11-12T17:14:08.873Z', 'updated_at': '2019-11-12T18:23:27.474Z', 'assigned_to_type': 'Organization', 'parent_id': 514013}, {'id': 514187, 'status': 'cancelled', 'type': 'JudgeAssignTask', 'created_at': '2019-11-12T18:23:27.463Z', 'updated_at': '2019-11-12T18:23:27.463Z', 'assigned_to_type': 'User', 'parent_id': 514013}]}


In [259]:
def gen_plantuml(appeal, limit=200):
    pstr = "@startuml\n"
    taskId2LabelDict={}
    for task in appeal['tasks']:
        taskLabel=f"{len(taskId2LabelDict)}.{task['type']}_{task['assigned_to_type']}"
        taskId2LabelDict[task['id']]=taskLabel
        pstr+=f"object {taskLabel} {colors[task['type']]}\n"
    for task in appeal['tasks']:
        if task['parent_id']:
            pstr+=f"{taskId2LabelDict.get(task['parent_id'])} -- {taskId2LabelDict[task['id']]}\n"
    pstr+="@enduml\n"
    return pstr

print(gen_plantuml(appeal))

@startuml
object 0.RootTask_Organization #66c2a5
object 1.TrackVeteranTask_Organization #8da0cb
object 2.DistributionTask_Organization #fc8d62
object 3.InformalHearingPresentationTask_Organization #ffd92f
object 4.JudgeAssignTask_User #8da0cb
object 5.JudgeDecisionReviewTask_User #66c2a5
object 6.AttorneyTask_User #fc8d62
object 7.BvaDispatchTask_Organization #e5c494
object 8.BvaDispatchTask_User #e5c494
object 9.ReturnedUndeliverableCorrespondenceMailTask_Organization #e78ac3
object 10.ReturnedUndeliverableCorrespondenceMailTask_Organization #e78ac3
object 11.ReturnedUndeliverableCorrespondenceMailTask_User #e78ac3
object 12.Task_Organization #e78ac3
0.RootTask_Organization -- 1.TrackVeteranTask_Organization
0.RootTask_Organization -- 2.DistributionTask_Organization
2.DistributionTask_Organization -- 3.InformalHearingPresentationTask_Organization
0.RootTask_Organization -- 4.JudgeAssignTask_User
0.RootTask_Organization -- 5.JudgeDecisionReviewTask_User
5.JudgeDecisionReviewTask_User -

In [64]:
import seaborn as sns

cp=sns.color_palette("Set2", n_colors=72).as_hex()
colors={
"RootTask":cp[0],
"DistributionTask":cp[1],
"TrackVeteranTask":cp[2],
"HearingTask":cp[3],
"ScheduleHearingTask":cp[4],
"InformalHearingPresentationTask":cp[5],
"BvaDispatchTask":cp[6],
"EvidenceSubmissionWindowTask":cp[7],
"JudgeDecisionReviewTask":cp[8],
"AttorneyTask":cp[9],
"JudgeAssignTask":cp[10],
"HearingAdminActionVerifyAddressTask":cp[11],
"AssignHearingDispositionTask":cp[12],
"EvidenceOrArgumentMailTask":cp[13],
"TranslationTask":cp[14],
"TranscriptionTask":cp[15],
"QualityReviewTask":cp[16],
"OtherColocatedTask":cp[17],
"AttorneyRewriteTask":cp[18],
"TimedHoldTask":cp[19],
"IhpColocatedTask":cp[20],
"AodMotionMailTask":cp[21],
"ScheduleHearingColocatedTask":cp[22],
"VeteranRecordRequest":cp[23],
"HearingRelatedMailTask":cp[24],
"JudgeDispatchReturnTask":cp[25],
"MissingRecordsColocatedTask":cp[26],
"ReturnedUndeliverableCorrespondenceMailTask":cp[27],
"NoShowHearingTask":cp[28],
"GenericTask":cp[29],
"StayedAppealColocatedTask":cp[30],
"ExtensionRequestMailTask":cp[31],
"ExtensionColocatedTask":cp[32],
"FoiaTask":cp[33],
"PowerOfAttorneyRelatedMailTask":cp[34],
"HearingAdminActionOtherTask":cp[35],
"HearingClarificationColocatedTask":cp[36],
"FoiaColocatedTask":cp[37],
"StatusInquiryMailTask":cp[38],
"PoaClarificationColocatedTask":cp[39],
"HearingAdminActionForeignVeteranCaseTask":cp[40],
"PreRoutingFoiaColocatedTask":cp[41],
"JudgeQualityReviewTask":cp[42],
"CongressionalInterestMailTask":cp[43],
"SpecialCaseMovementTask":cp[44],
"PrivacyActTask":cp[45],
"ChangeHearingDispositionTask":cp[46],
"AddressChangeMailTask":cp[47],
"VacateMotionMailTask":cp[48],
"PreRoutingTranslationColocatedTask":cp[49],
"FoiaRequestMailTask":cp[50],
"AttorneyDispatchReturnTask":cp[51],
"AttorneyQualityReviewTask":cp[52],
"ReconsiderationMotionMailTask":cp[53],
"PrivacyActRequestMailTask":cp[54],
"DeathCertificateMailTask":cp[55],
"AojColocatedTask":cp[56],
"AddressVerificationColocatedTask":cp[57],
"TranslationColocatedTask":cp[58],
"Task":cp[59],
"BoardGrantEffectuationTask":cp[60],
"AppealWithdrawalMailTask":cp[61],
"PendingScanningVbmsColocatedTask":cp[62],
"OtherMotionMailTask":cp[63],
"ControlledCorrespondenceMailTask":cp[64],
"UnaccreditedRepColocatedTask":cp[65],
"PulacCerulloTask":cp[66],
"PreRoutingMissingHearingTranscriptsColocatedTask":cp[67],
"NewRepArgumentsColocatedTask":cp[68],
"MissingHearingTranscriptsColocatedTask":cp[69],
"HearingAdminActionFoiaPrivacyRequestTask":cp[70]
}
customcolors={
"assigned":"#cc6600",
"inprogress":"#006600",
"completed":"#0000cc",
}
colors.update(customcolors)

In [None]:
typenames={}
with open('input-all.json') as jf:
    for count, line in enumerate(jf):
        line = jf.readline()
        data = json.loads(line)
        for t in data['tasks']:
            count=typenames.get(t['type'],0)
            typenames[t['type']]=count+1

sortednames=sorted(typenames.items(), key = lambda kv:(kv[1], kv[0]))
sortednames.reverse()
i=0
for name,count in sortednames:
    print("\""+name+"\":cp["+str(i)+"],")
    i+=1


In [None]:
sns.color_palette("Set2", n_colors=72).as_hex()

In [None]:
for key,color in colors.items():
    print("object "+key+" "+color)