In [1]:
# import metadata_ingest modules

from metadata_ingest.form_parsers import ITSMetadataQuestionnaire, PDFQuestionnaire
from metadata_ingest.socrata_ingestor import SocrataDataset


# set sample questionnaire file path
fp = 'forms/ITSJPO_MetadataQuestionnaire_fillable_sample.pdf'

# Run generic PDF parser

In [2]:
print('Parsing {} using generic PDF parser'.format(fp))
questionnaire = PDFQuestionnaire(fp)

print('===== Content ====')
questionnaire.content

Parsing forms/ITSJPO_MetadataQuestionnaire_fillable_sample.pdf using generic PDF parser
===== Content ====


{'title': 'Sample Dataset Title',
 'accessLevel': 'Restricted Public',
 'rights': 'Sample text.',
 'describedByAccessLevel': 'Public',
 'describedByRights': 'N/A',
 'distribution': {'downloadURL': 'N/A',
  'accessURL': 'N/A',
  'describedBy': 'N/A',
  'describedByType': 'N/A'},
 'contactPoint': {'dataOwner': 'Jane Doe: jane@dot.gov',
  'dataSteward': 'ITS JPO: data.itsjpo@dot.gov, Joan Smith: joan@dot.gov',
  'federalSponsor': 'John Smith: john@dot.gov'},
 'creator': 'Wyoming Department of Transportation (WYDOT)',
 'publisher': {'name': 'USDOT'},
 'bureauCode': '021:15',
 'programCode': '021:013',
 'license': 'https://creativecommons.org/licenses/by-sa/4.0/',
 'description': 'Sample description of the dataset.',
 'spatial': 'Cambridge, MA',
 'temporal': '2018-01-01/2018-12-31',
 'accrualPeriodicity': 'R/P1D',
 'theme': 'Automobiles, Railroads',
 'keyword': 'tag1, tag2, tag3',
 'version': '1.0',
 'issued': '2019-01-01',
 'landingPage': 'http://its.dot.gov/data/',
 'identifiersExtended':

# Run ITS DataHub Metadata Questionnaire PDF parser

In [3]:
print('Parsing {} using generic ITS DataHub Metadata Questionnaire PDF parser'.format(fp))
mq = ITSMetadataQuestionnaire(fp)

print('===== Content ====')
mq.content

Parsing forms/ITSJPO_MetadataQuestionnaire_fillable_sample.pdf using generic ITS DataHub Metadata Questionnaire PDF parser
===== Content ====


{'title': 'Sample Dataset Title',
 'accessLevel': 'Restricted Public',
 'rights': 'Sample text.',
 'describedByAccessLevel': 'Public',
 'describedByRights': 'N/A',
 'distribution': [{'downloadURL': 'N/A',
   'accessURL': 'N/A',
   'describedBy': 'N/A',
   'describedByType': 'N/A'}],
 'contactPoint': [{'fn': 'Jane Doe',
   'hasEmail': 'jane@dot.gov',
   'hasRole': 'dataOwner'},
  {'fn': 'ITS JPO',
   'hasEmail': 'data.itsjpo@dot.gov',
   'hasRole': 'dataSteward'},
  {'fn': 'Joan Smith', 'hasEmail': 'joan@dot.gov', 'hasRole': 'dataSteward'},
  {'fn': 'John Smith',
   'hasEmail': 'john@dot.gov',
   'hasRole': 'federalSponsor'}],
 'creator': 'Wyoming Department of Transportation (WYDOT)',
 'publisher': {'name': 'USDOT'},
 'bureauCode': '021:15',
 'programCode': '021:013',
 'license': 'https://creativecommons.org/licenses/by-sa/4.0/',
 'description': 'Sample description of the dataset.',
 'spatial': 'Cambridge, MA',
 'temporal': '2018-01-01/2018-12-31',
 'accrualPeriodicity': 'R/P1D',
 'the

## Format metadata to fit Socrata's schema

In [4]:
print('===== DTG metadataUpsert ====')
mq.generate_dtg_metadata()

===== DTG metadataUpsert ====


{'name': 'Sample Dataset Title',
 'attribution': 'U.S. Department of Transportation Intelligent Transportation Systems Joint Program Office (JPO)',
 'description': 'Sample description of the dataset.',
 'privateMetadata': {'contactEmail': 'RDAE_Support@bah.com'},
 'tags': ['tag1',
  'tag3',
  'intelligent transportation systems (its)',
  'its joint program office (jpo)',
  'tag2'],
 'customFields': {'Common Core': {'Contact Email': 'data.itsjpo@dot.gov',
   'Contact Name': 'ITS JPO',
   'Language': 'English',
   'Update Frequency': 'R/P1D',
   'License': 'Other',
   'Program Code': '021:013',
   'Bureau Code': '021:15',
   'Geographic Coverage': 'Cambridge, MA',
   'Publisher': 'USDOT',
   'Temporal Applicability': '2018-01-01/2018-12-31',
   'Is Quality Data': 'True',
   'Public Access Level': 'restricted public',
   'Homepage': 'http://its.dot.gov/data/'}},
 'category': 'Automobiles'}

## Create new dataset in Socrata using content from a questionnaire

In [5]:
dataset = SocrataDataset(uuid=None, mq_fp=fp)

In [6]:
dataset.create_dataset()

Job progress: initializing
Job progress: initializing
Job progress: initializing
Job progress: initializing
Job progress: initializing
Job progress: finishing
Job progress: finishing
Job progress: finishing
Job progress: finishing
Job progress: successful
Dataset has been created at https://datahub.transportation.gov/d/xd74-fkkc


In [8]:
# ID of newly created dataset
dataset.uuid

'xd74-fkkc'

## Update existing dataset in Socrata using content from a questionnaire

In [9]:
updated_dataset = SocrataDataset(uuid=dataset.uuid, mq_fp=fp)
updated_dataset.update_metadata()

Metadata updated for https://datahub.transportation.gov/d/xd74-fkkc
