In [1]:
import json

def compare_json_lists(list1, list2):
    """
    Compare two lists of JSON objects, returning the common elements.
    """
    common = []
    for item1 in list1:
        for item2 in list2:
            if item1 == item2:
                common.append(item1)
                break
    return common

def extract_common_chart_properties(chart1, chart2):
    """
    Extract common chart properties from two chart specifications.
    """
    if chart1['task'] != chart2['task'] or chart1['number_rows'] != chart2['number_rows']:
        raise ValueError("Task and number of rows must be the same in both charts")

    if 'field' in chart1 and 'field' in chart2:
        if chart1['field'] != chart2['field']:
            raise ValueError("Fields must be the same in both charts")

    common_properties = {
        'task': chart1['task'],
        'number_rows': chart1['number_rows'],
        'field': chart1['field']
    }

    if chart1['view'][0]['coordinates'] == chart2['view'][0]['coordinates']:
        common_properties['view'] = {'coordinates': chart1['view'][0]['coordinates']}
    
    if chart1['view'][0]['mark'][0]['type'] == chart2['view'][0]['mark'][0]['type']:
        common_properties['view']['mark'] = [{'type': chart1['view'][0]['mark'][0]['type']}]

    
    encoding_common = compare_json_lists(chart1['view'][0]['mark'][0].get('encoding', []), chart2['view'][0]['mark'][0].get('encoding', []))
    if encoding_common:
        common_properties['view']['mark'][0] = common_properties['view']['mark'][0] | {"encoding":encoding_common}

    scale_common = compare_json_lists(chart1['view'][0].get('scale', []), chart2['view'][0].get('scale', []))
    if scale_common:
        common_properties['view']['scale'] = scale_common

    if 'facet' in chart1['view'][0] and 'facet' in chart2['view'][0] and chart1['view'][0]['facet'] == chart2['view'][0]['facet']:
        common_properties['view']['facet'] = chart1['view'][0]['facet']

    return common_properties

# Example JSONs
chart1= {"task": "summary", "number_rows": 30, "field": [{"name": "n", "type": "string", "entropy": 1000, "unique": 10, "interesting": "true"}, {"name": "q1", "type": "number", "entropy": 3843, "unique": 30, "interesting": "true"}, {"name": "q2", "type": "number", "entropy": 3912, "unique": 30}], "view": [{"coordinates": "cartesian", "mark": [{"type": "point", "encoding": [{"channel": "color", "field": "q1"}, {"channel": "x", "field": "q2"}, {"channel": "y", "field": "n"}]}], "scale": [{"channel": "color", "type": "linear", "zero": "true"}, {"channel": "x", "type": "linear", "zero": "true"}, {"channel": "y", "type": "ordinal"}]}]}

chart2= {"task": "summary", "number_rows": 30, "field": [{"name": "n", "type": "string", "entropy": 1000, "unique": 10, "interesting": "true"}, {"name": "q1", "type": "number", "entropy": 3843, "unique": 30, "interesting": "true"}, {"name": "q2", "type": "number", "entropy": 3912, "unique": 30}], "view": [{"coordinates": "cartesian", "mark": [{"type": "point", "encoding": [{"channel": "size", "field": "q1"}, {"channel": "x", "field": "q2"}, {"channel": "y", "field": "m"}]}], "scale": [{"channel": "size", "type": "linear", "zero": "true"}, {"channel": "x", "type": "linear", "zero": "true"}, {"channel": "y", "type": "ordinal"}]}]}


try:
    result = extract_common_chart_properties(chart1, chart2)
    result_json = json.dumps(result, indent=2)
except ValueError as e:
    result_json = str(e)

print(result_json)


{
  "task": "summary",
  "number_rows": 30,
  "field": [
    {
      "name": "n",
      "type": "string",
      "entropy": 1000,
      "unique": 10,
      "interesting": "true"
    },
    {
      "name": "q1",
      "type": "number",
      "entropy": 3843,
      "unique": 30,
      "interesting": "true"
    },
    {
      "name": "q2",
      "type": "number",
      "entropy": 3912,
      "unique": 30
    }
  ],
  "view": {
    "coordinates": "cartesian",
    "mark": [
      {
        "type": "point",
        "encoding": [
          {
            "channel": "x",
            "field": "q2"
          }
        ]
      }
    ],
    "scale": [
      {
        "channel": "x",
        "type": "linear",
        "zero": "true"
      },
      {
        "channel": "y",
        "type": "ordinal"
      }
    ]
  }
}


In [3]:
import os
import re
import json

def read_and_process_files(directory, file_pattern, processing_function, output_directory):
    results = {}
    i = 1
    for filename in os.listdir(directory):
        
        if re.match(file_pattern, filename):
            with open(os.path.join(directory, filename), 'r') as file:
                content = file.read()
                

                # Extracting JSON objects for Chart 1 and Chart 2
                chart1_json = re.search(r'Chart 1: (\{.*\})', content)
                chart2_json = re.search(r'Chart 2: (\{.*\})', content)
                
                if chart1_json and chart2_json:
                    chart1 = json.loads(chart1_json.group(1))
                    chart2 = json.loads(chart2_json.group(1))
#                     print(chart1,chart2)
                    # Applying the processing function
                    try:
                        result = processing_function(chart1, chart2)
                    except ValueError as e:
                        result = str(e)
                    
                    results[filename] = result
                    
                    match = re.match(r'^(\d+)_pos.*\.txt$', filename)
                    if match:
                        file_number = match.group(1)
                        output_path = os.path.join(output_directory, f'{file_number}.txt')
                        with open(output_path, 'w') as output_file:
                            output_file.write(json.dumps(result))
    return results

# Define the directory and file pattern
source_directory = '../rank/data/example_pairs_to_rank/kim2018'
output_directory = './common_partial_spec/kim2018'
file_pattern = r'^\d+_pos.*\.txt$'

# Call the function with the directory, file pattern, and processing function
processed_files = read_and_process_files(source_directory, file_pattern, extract_common_chart_properties, output_directory)

#processed_files

