In [None]:
#Application of frictionless principles to the 'Texas_NewspaperTitleList_final' 
#output is JSON file describing the Newspaper List csv file

#Please download frictionless at frictionlessdata.io/software/#software-toolkit

In [10]:
#import frictionless packages
import frictionless
from frictionless import describe

import pprint
pp = pprint.PrettyPrinter(depth=4)

In [12]:
#'describe' datasets and print
TX_CountyIDs = describe('https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv')

pp.pprint(TX_CountyIDs)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_identifiers',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'name': 'County Name', 'type': 'string'},
                       {'name': 'FIPS Code - State', 'type': 'integer'},
                       {'name': 'FIPS Code - County', 'type': 'integer'},
                       {'name': 'geonames ID', 'type': 'integer'},
                       {'name': 'wikidata ID', 'type': 'string'},
                       {'name': 'UNTL County String', 'type': 'string'},
                       {'name': 'UNTL Place Name String', 'type': 'string'}]},
 'scheme': 'https'}


In [13]:
#applying more detail to each column
TX_CountyIDs.schema.get_field("County Name").title = "County Name"
TX_CountyIDs.schema.get_field("County Name").description = "Name of all Counties in Texas, n=254"

TX_CountyIDs.schema.get_field("FIPS Code - State").title = "FIPS Code - State"
TX_CountyIDs.schema.get_field("FIPS Code - State").description = "Federal Information Processing Statndards (FIPS) code for the State of Texas"

TX_CountyIDs.schema.get_field("FIPS Code - County").title = "FIPS Code - County"
TX_CountyIDs.schema.get_field("FIPS Code - County").description = "Federal Information Processing Statndards (FIPS) code for Texas Counties"

TX_CountyIDs.schema.get_field("geonames ID").title = "geonames ID"
TX_CountyIDs.schema.get_field("geonames ID").description = "Geonames Identifier as documented by https://www.geonames.org/"

TX_CountyIDs.schema.get_field("wikidata ID").title = "wikidata ID"
TX_CountyIDs.schema.get_field("wikidata ID").description = "Wikidata Identifier as documented by https://www.wikidata.org/Wikidata:Identifiers"

TX_CountyIDs.schema.get_field("UNTL County String").title = "UNTL County String"
TX_CountyIDs.schema.get_field("UNTL County String").description = "University of North Texas Library (UNTL) County String Identifier, formatted as '[County Name], TX'"

TX_CountyIDs.schema.get_field("UNTL Place Name String").title = "UNTL Place Name String"
TX_CountyIDs.schema.get_field("UNTL Place Name String").description = "University of North Texas Library (UNTL) Place Name String Identifier, formatted as 'United States - Texas - [County Name]'"

pp.pprint(TX_CountyIDs)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_identifiers',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Name of all Counties in Texas, n=254',
                        'name': 'County Name',
                        'title': 'County Name',
                        'type': 'string'},
                       {'description': 'Federal Information Processing '
                                       'Statndards (FIPS) code for the State '
                                       'of Texas',
                        'name': 'FIPS Code - State',
                        'title': 'FIPS Code - State',
                        'type': 'integer'},
                       {'description': 'Federal Information Processing '
                                       'Statndards (FIPS) code for Texas '
                   

In [14]:
#apply constraint field descriptors to the tables
TX_CountyIDs.schema.get_field("FIPS Code - State").constraints["enum"] = ['48']

pp.pprint(TX_CountyIDs)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_identifiers',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Name of all Counties in Texas, n=254',
                        'name': 'County Name',
                        'title': 'County Name',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Federal Information Processing '
                                       'Statndards (FIPS) code for the State '
                                       'of Texas',
                        'name': 'FIPS Code - State',
                        'title': 'FIPS Code - State',
                        'type': 'integer'},
                       {'description': 'Federal Information Processing '
                                       'Statndar

In [15]:
#handling missing values
TX_CountyIDs.schema.missing_values = [""]
pp.pprint(TX_CountyIDs)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_identifiers',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Name of all Counties in Texas, n=254',
                        'name': 'County Name',
                        'title': 'County Name',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Federal Information Processing '
                                       'Statndards (FIPS) code for the State '
                                       'of Texas',
                        'name': 'FIPS Code - State',
                        'title': 'FIPS Code - State',
                        'type': 'integer'},
                       {'description': 'Federal Information Processing '
                                       'Statndar

In [16]:
#add table description
TX_CountyIDs.description = "The FIPS Code, geonames, and wikidata identifiers, and string formats used by the University of North Texas Libraries for all 254 Counties in Texaas. Data was manually gathered from several open online repositiories identified in the descriptions of applicable variables / columns."
pp.pprint(TX_CountyIDs)

{'description': 'The FIPS Code, geonames, and wikidata identifiers, and string '
                'formats used by the University of North Texas Libraries for '
                'all 254 Counties in Texaas. Data was manually gathered from '
                'several open online repositiories identified in the '
                'descriptions of applicable variables / columns.',
 'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_identifiers',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Name of all Counties in Texas, n=254',
                        'name': 'County Name',
                        'title': 'County Name',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Federal Information Processing '
                         

In [17]:
#print JSON file
TX_CountyIDs.to_json("TX_CountyIDs.json")

'{\n  "path": "https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX-County_Identifier/TX-County_Identifiers.csv",\n  "name": "tx-county_identifiers",\n  "profile": "tabular-data-resource",\n  "scheme": "https",\n  "format": "csv",\n  "hashing": "md5",\n  "encoding": "utf-8",\n  "schema": {\n    "fields": [\n      {\n        "name": "County Name",\n        "type": "string",\n        "title": "County Name",\n        "description": "Name of all Counties in Texas, n=254"\n      },\n      {\n        "name": "FIPS Code - State",\n        "type": "integer",\n        "title": "FIPS Code - State",\n        "description": "Federal Information Processing Statndards (FIPS) code for the State of Texas",\n        "constraints": {\n          "enum": [\n            "48"\n          ]\n        }\n      },\n      {\n        "name": "FIPS Code - County",\n        "type": "integer",\n        "title": "FIPS Code - County",\n        "description": "Federal Information Processing Statnd

In [9]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 3A43-426E

 Directory of C:\Users\Dream Machine

10/18/2021  12:08 PM    <DIR>          .
10/18/2021  12:08 PM    <DIR>          ..
02/16/2020  06:16 PM    <DIR>          .anaconda
07/18/2021  02:01 PM    <DIR>          .atom
05/25/2021  05:27 PM    <DIR>          .conda
10/17/2021  10:42 PM                60 .condarc
02/16/2020  06:16 PM    <DIR>          .config
09/02/2021  10:07 PM               177 .gitconfig
12/13/2019  10:57 AM    <DIR>          .idlerc
10/18/2021  11:12 AM    <DIR>          .ipynb_checkpoints
02/16/2020  06:16 PM    <DIR>          .ipython
10/18/2021  11:03 AM    <DIR>          .jupyter
02/16/2020  06:16 PM    <DIR>          .matplotlib
05/18/2019  10:54 AM    <DIR>          .Origin
10/17/2021  08:48 PM    <DIR>          .PyCharmCE2019.3
05/18/2019  10:54 AM    <DIR>          .QtWebEngineProcess
02/19/2020  09:40 PM    <DIR>          .spyder-py3
02/09/2020  05:29 PM    <DIR>          .thumbnails
08/18/202