# ---------------------------------------------------------#
# astroNN.gaia.downloader: download gaia files
# ---------------------------------------------------------#
import os
import urllib.request
import numpy as np
from astropy.io import fits
import astroNN
from astroNN.gaia.gaia_shared import gaia_env, gaia_default_dr
from astroNN.shared.custom_warnings import deprecated
from astroNN.shared.downloader_tools import TqdmUpTo, md5_checksum
currentdir = os.getcwd()
[docs]def tgas(flag=None):
"""
Get path to the Gaia TGAS DR1 files, download if files not found
:return: List of file path
:rtype: list
:History: 2017-Oct-13 - Written - Henry Leung (University of Toronto)
"""
# Check if dr arguement is provided, if none then use default
fulllist = []
# Check if directory exists
folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/tgas_source/fits/')
urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/tgas_source/fits/'
if not os.path.exists(folderpath):
os.makedirs(folderpath)
hash_filename = 'MD5SUM.txt'
full_hash_filename = os.path.join(folderpath, hash_filename)
if not os.path.isfile(full_hash_filename):
urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename)
hash_list = np.loadtxt(full_hash_filename, dtype='str').T
for i in range(0, 16, 1):
filename = f'TgasSource_000-000-0{i:0{2}d}.fits'
fullfilename = os.path.join(folderpath, filename)
urlstr = urlbase + filename
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = md5_checksum(fullfilename)
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print('File corruption detected, astroNN attempting to download again')
tgas(flag=1)
else:
print(fullfilename + ' was found!')
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t:
# Download
urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to)
checksum = md5_checksum(fullfilename)
if checksum != file_hash and len(file_hash) != 0:
print('File corruption detected, astroNN attempting to download again')
tgas(flag=1)
print(f'Downloaded Gaia DR1 TGAS ({i:d} of 15) file catalog successfully to {fullfilename}')
fulllist.extend([fullfilename])
return fulllist
[docs]def tgas_load(cuts=True):
"""
To load useful parameters from multiple TGAS DR1 files
:param cuts: Whether to cut bad data (negative parallax and percentage error more than 20%, or a custom cut percentage)
:type cuts: Union[boolean, 0.2]
:return: Dictionary of parameters
:rtype: dict
:History: 2017-Dec-17 - Written - Henry Leung (University of Toronto)
"""
tgas_list = tgas()
ra = np.array([])
dec = np.array([])
pmra_gaia = np.array([])
pmdec_gaia = np.array([])
parallax_gaia = np.array([])
parallax_error_gaia = np.array([])
g_band_gaia = np.array([])
for i in tgas_list:
gaia = fits.open(i)
ra = np.concatenate((ra, gaia[1].data['RA']))
dec = np.concatenate((dec, gaia[1].data['DEC']))
pmra_gaia = np.concatenate((pmra_gaia, gaia[1].data['PMRA']))
pmdec_gaia = np.concatenate((pmdec_gaia, gaia[1].data['PMDEC']))
parallax_gaia = np.concatenate((parallax_gaia, gaia[1].data['parallax']))
parallax_error_gaia = np.concatenate((parallax_error_gaia, gaia[1].data['parallax_error']))
g_band_gaia = np.concatenate((g_band_gaia, gaia[1].data['phot_g_mean_mag']))
gaia.close()
if cuts is True or isinstance(cuts, float):
filtered_index = [(parallax_error_gaia / parallax_gaia < (0.2 if isinstance(cuts, bool) else cuts)) &
(parallax_gaia > 0.)]
ra = ra[filtered_index]
dec = dec[filtered_index]
pmra_gaia = pmra_gaia[filtered_index]
pmdec_gaia = pmdec_gaia[filtered_index]
parallax_gaia = parallax_gaia[filtered_index]
parallax_error_gaia = parallax_error_gaia[filtered_index]
g_band_gaia = g_band_gaia[filtered_index]
return {'ra': ra, 'dec': dec, 'pmra': pmra_gaia, 'pmdec': pmdec_gaia, 'parallax': parallax_gaia,
'parallax_err': parallax_error_gaia, 'gmag': g_band_gaia}
@deprecated
def gaia_source(dr=None, flag=None):
"""
NAME:
gaia_source
PURPOSE:
download the gaia_source files
INPUT:
dr (int): Gaia DR, example dr=1
flag (int): 0: normal, 1: force to re-download
OUTPUT:
list of file path
HISTORY:
2017-Oct-13 - Written - Henry Leung (University of Toronto)
2017-Nov-26 - Update - Henry Leung (University of Toronto)
"""
dr = gaia_default_dr(dr=dr)
fulllist = []
if dr == 1:
# Check if directory exists
folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/gaia_source/fits/')
urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/gaia_source/fits/'
if not os.path.exists(folderpath):
os.makedirs(folderpath)
hash_filename = 'MD5SUM.txt'
full_hash_filename = os.path.join(folderpath, hash_filename)
if not os.path.isfile(full_hash_filename):
urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename)
hash_list = np.loadtxt(full_hash_filename, dtype='str').T
for j in range(0, 20, 1):
for i in range(0, 256, 1):
filename = f'GaiaSource_000-0{j:0{2}d}-{i:0{3}d}.fits'
urlstr = urlbase + filename
fullfilename = os.path.join(folderpath, filename)
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = md5_checksum(fullfilename)
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print('File corruption detected, astroNN attempting to download again')
gaia_source(dr=dr, flag=1)
else:
print(fullfilename + ' was found!')
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t:
urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to)
checksum = md5_checksum(fullfilename)
if checksum != file_hash and len(file_hash) != 0:
print('File corruption detected, astroNN attempting to download again')
gaia_source(dr=dr, flag=1)
print(f'Downloaded Gaia DR{dr} Gaia Source ({(j * 256 + i):d} of {(256 * 20 + 112):d}) '
f'file catalog successfully to {fullfilename}')
fulllist.extend([fullfilename])
for i in range(0, 111, 1):
filename = f'GaiaSource_000-020-{i:0{3}d}.fits'
urlstr = urlbase + filename
fullfilename = os.path.join(folderpath, filename)
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = md5_checksum(fullfilename)
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print('File corruption detected, astroNN attempting to download again')
gaia_source(dr=dr, flag=1)
else:
print(fullfilename + ' was found!')
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t:
urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to)
checksum = md5_checksum(fullfilename)
if checksum != file_hash and len(file_hash) != 0:
print('File corruption detected, astroNN attempting to download again')
gaia_source(dr=dr, flag=1)
print(f'Downloaded Gaia DR{dr} Gaia Source ({(20 * 256 + i):d} of {(256 * 20 + 112):d}) file '
f'catalog successfully to {fullfilename}')
fulllist.extend([fullfilename])
else:
raise ValueError('gaia_source() only supports Gaia DR1 Gaia Source')
return fulllist
@deprecated
def anderson_2017_parallax(cuts=True):
"""
NAME:
anderson_2017_parallax
PURPOSE:
load pre-compiled Anderson et al 2017 improved parallax from data-driven stars model
INPUT:
cuts (boolean): whether to cut those parallax err larger than 20% or not
OUTPUT:
ra (ndarray)
dec (ndarray)
parallax (ndarray): parallax in mas
parallax_err (ndarray): 1-standard derivation parallax error in mas
HISTORY:
2017-Dec-22 - Written - Henry Leung (University of Toronto)
"""
fullfilename = os.path.join(os.path.dirname(astroNN.__path__[0]), 'astroNN', 'data',
'anderson_2017_dr14_parallax.npz')
print('\nOriginal dataset at: http://voms.simonsfoundation.org:50013/8kM7XXPCJleK2M02B9E7YIYmvu5l2rh/ServedFiles/')
print('Please be advised starting from 26 April 2018, anderson2017 in astroNN was reduced to parallax cross '
'matched with APOGEE DR14 only')
print('If you see this message, anderson2017 in this astroNN version is reduced. Moreover, anderson2017 will be '
'removed in the future\n')
hdu = np.load(fullfilename)
ra = hdu['ra']
dec = hdu['dec']
parallax = hdu['parallax']
parallax_err = hdu['parallax_err']
if cuts is True:
good_index = np.where(parallax_err / parallax < 0.2)[0]
ra = ra[good_index]
dec = dec[good_index]
parallax = parallax[good_index]
parallax_err = parallax_err[good_index]
return ra, dec, parallax, parallax_err
[docs]def gaiadr2_parallax(cuts=True, keepdims=False, offset=False):
"""
Load Gaia DR2 - APOGEE DR14 matches, indices corresponds to APOGEE allstar DR14 file
:param cuts: Whether to cut bad data (negative parallax and percentage error more than 20%), or a float to set the threshold
:type cuts: Union[boolean, float]
:param keepdims: Whether to preserve indices the same as APOGEE allstar DR14, no effect when cuts=False, set to -9999 for bad indices when cuts=True keepdims=True
:type keepdims: boolean
:param offset: Whether to correction Gaia DR2 zero point offset
- False to assume no offset correction
- True to assume 52.8-4.21(G-12.2)
- "leungbovy2019" for leung & bovy 2019 offset correction
- a float to assume a float offset globally
:type offset: Union[boolean, float, str]
:return: numpy array of ra, dec, parallax, parallax_error
:rtype: ndarrays
:History: 2018-Apr-26 - Written - Henry Leung (University of Toronto)
"""
fullfilename = os.path.join(os.path.dirname(astroNN.__path__[0]), 'astroNN', 'data',
'gaiadr2_apogeedr14_parallax.npz')
print('This is Gaia DR2 - APOGEE DR14 matched parallax, RA DEC in J2015.5, parallax in mas')
hdu = np.load(fullfilename)
ra = np.array(hdu['RA'])
dec = np.array(hdu['DEC'])
parallax = np.array(hdu['parallax'])
parallax_err = np.array(hdu['parallax_error'])
gmag = np.array(hdu['g'])
if (cuts is True or isinstance(cuts, float)) and keepdims is False:
good_idx = ((parallax_err / parallax < (0.2 if cuts is True else cuts)) & (parallax > 0.))
ra = ra[good_idx]
dec = dec[good_idx]
parallax = parallax[good_idx]
parallax_err = parallax_err[good_idx]
gmag = gmag[good_idx]
elif (cuts is True or isinstance(cuts, float)) and keepdims is True:
print("Moreover, indices correspond to APOGEE allstar DR14 file")
# Not magic_number because this should be apogee style
bad_idx = ((parallax_err / parallax > (0.2 if cuts is True else cuts)) | (parallax < 0.))
parallax[bad_idx] = -9999.
parallax_err[bad_idx] = -9999.
else:
# no cuts so do nothing
pass
if offset is True:
parallax[parallax != -9999.] += (0.0528 - 0.0421 * (gmag[parallax != -9999.] - 12.2))
elif offset is False:
pass
elif isinstance(offset, float):
parallax[parallax != -9999.] += offset
elif offset == 'leungbovy2019':
def bias(x):
bias = 0.056 - 0.00574 * x - 0.0096 * x ** 2
return bias
parallax[(parallax != -9999.) & (parallax < 2.)] += bias(parallax[(parallax != -9999.) & (parallax < 2.)])
else:
raise ValueError("Unknown offset option")
return ra, dec, parallax, parallax_err