# Crop Science Warehouse Workspace

Set the CSW project and the output file you want to use. The file path can be compressed, plain text, local, or in S3:

In [None]:
project="bcs-breeding-datasets"
output_file="~/cotton_restricted_or_ambig_tou_inventory.tsv.gz"
import time

Run the block below to authenticate. If prompted, click the link, log into Google using your Bayer email, and then copy and paste the provided token into the text field

In [None]:
from google.cloud import bigquery
import smart_open
import pydata_google_auth
credentials = pydata_google_auth.get_user_credentials(
    ['https://www.googleapis.com/auth/bigquery'],
)

Modify the query string below as needed

In [None]:
sql_query = '''
select haps.Position, haps.AncestralCall 
from `bcs-breeding-datasets.breeding_genomics.ancestralHaplotypes_maize` as haps 
where haps.Line='JENU381'
  and haps.Chromosome=1
  and haps.Position>=100
  and haps.Position<=120
order by haps.Position
		        
'''

Run the query and download the result handle

In [None]:
client = bigquery.Client(credentials=credentials, project=project)
results = client.query(sql_query).result()

Write the results to the output file

In [None]:
writer = smart_open.open(output_file, 'w')
i = 0
for row in results:
    i += 1
    if i==1:
        writer.write("\t".join(row.keys()) + "\n")
    if (100 * i/results.total_rows) % 5 == 0:
        print(str(100 * i/results.total_rows) + "%\r")
    writer.write("\t".join(map(lambda x: str(x) ,row.values())) + "\n")
writer.close()