# ---------------------------------------------------------#
# astroNN.gaia.downloader: download gaia files
# ---------------------------------------------------------#
import os
import urllib.request
import numpy as np
from astropy.io import fits
import astroNN
import astroNN.data
from astroNN.gaia.gaia_shared import gaia_env, gaia_default_dr
from astroNN.shared.warnings import deprecated
from astroNN.shared.downloader_tools import TqdmUpTo, filehash
currentdir = os.getcwd()
[docs]def tgas(flag=None):
"""
Get path to the Gaia TGAS DR1 files, download if files not found
:return: List of file path
:rtype: list
:History: 2017-Oct-13 - Written - Henry Leung (University of Toronto)
"""
# Check if dr arguement is provided, if none then use default
fulllist = []
# Check if directory exists
folderpath = os.path.join(gaia_env(), "Gaia/gdr1/tgas_source/fits/")
urlbase = "http://cdn.gea.esac.esa.int/Gaia/gdr1/tgas_source/fits/"
if not os.path.exists(folderpath):
os.makedirs(folderpath)
hash_filename = "MD5SUM.txt"
full_hash_filename = os.path.join(folderpath, hash_filename)
if not os.path.isfile(full_hash_filename):
urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename)
hash_list = np.loadtxt(full_hash_filename, dtype="str").T
for i in range(0, 16, 1):
filename = f"TgasSource_000-000-0{i:0{2}d}.fits"
fullfilename = os.path.join(folderpath, filename)
urlstr = urlbase + filename
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = filehash(fullfilename, algorithm="md5")
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print(
"File corruption detected, astroNN is attempting to download again"
)
tgas(flag=1)
else:
print(fullfilename + " was found!")
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(
unit="B", unit_scale=True, miniters=1, desc=urlstr.split("/")[-1]
) as t:
# Download
urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to)
checksum = filehash(fullfilename, algorithm="md5")
if checksum != file_hash and len(file_hash) != 0:
print(
"File corruption detected, astroNN is attempting to download again"
)
tgas(flag=1)
print(
f"Downloaded Gaia DR1 TGAS ({i:d} of 15) file catalog successfully to {fullfilename}"
)
fulllist.extend([fullfilename])
return fulllist
[docs]def tgas_load(cuts=True):
"""
To load useful parameters from multiple TGAS DR1 files
:param cuts: Whether to cut bad data (negative parallax and percentage error more than 20%, or a custom cut percentage)
:type cuts: Union[boolean, 0.2]
:return: Dictionary of parameters
:rtype: dict
:History: 2017-Dec-17 - Written - Henry Leung (University of Toronto)
"""
tgas_list = tgas()
ra = np.array([])
dec = np.array([])
pmra_gaia = np.array([])
pmdec_gaia = np.array([])
parallax_gaia = np.array([])
parallax_error_gaia = np.array([])
g_band_gaia = np.array([])
for i in tgas_list:
gaia = fits.open(i)
ra = np.concatenate((ra, gaia[1].data["RA"]))
dec = np.concatenate((dec, gaia[1].data["DEC"]))
pmra_gaia = np.concatenate((pmra_gaia, gaia[1].data["PMRA"]))
pmdec_gaia = np.concatenate((pmdec_gaia, gaia[1].data["PMDEC"]))
parallax_gaia = np.concatenate((parallax_gaia, gaia[1].data["parallax"]))
parallax_error_gaia = np.concatenate(
(parallax_error_gaia, gaia[1].data["parallax_error"])
)
g_band_gaia = np.concatenate((g_band_gaia, gaia[1].data["phot_g_mean_mag"]))
gaia.close()
if cuts is True or isinstance(cuts, float):
filtered_index = [
(
parallax_error_gaia / parallax_gaia
< (0.2 if isinstance(cuts, bool) else cuts)
)
& (parallax_gaia > 0.0)
]
ra = ra[filtered_index]
dec = dec[filtered_index]
pmra_gaia = pmra_gaia[filtered_index]
pmdec_gaia = pmdec_gaia[filtered_index]
parallax_gaia = parallax_gaia[filtered_index]
parallax_error_gaia = parallax_error_gaia[filtered_index]
g_band_gaia = g_band_gaia[filtered_index]
return {
"ra": ra,
"dec": dec,
"pmra": pmra_gaia,
"pmdec": pmdec_gaia,
"parallax": parallax_gaia,
"parallax_err": parallax_error_gaia,
"gmag": g_band_gaia,
}
@deprecated
def gaia_source(dr=None, flag=None):
"""
NAME:
gaia_source
PURPOSE:
download the gaia_source files
INPUT:
dr (int): Gaia DR, example dr=1
flag (int): 0: normal, 1: force to re-download
OUTPUT:
list of file path
HISTORY:
2017-Oct-13 - Written - Henry Leung (University of Toronto)
2017-Nov-26 - Update - Henry Leung (University of Toronto)
"""
dr = gaia_default_dr(dr=dr)
fulllist = []
if dr == 1:
# Check if directory exists
folderpath = os.path.join(gaia_env(), "Gaia/gdr1/gaia_source/fits/")
urlbase = "http://cdn.gea.esac.esa.int/Gaia/gdr1/gaia_source/fits/"
if not os.path.exists(folderpath):
os.makedirs(folderpath)
hash_filename = "MD5SUM.txt"
full_hash_filename = os.path.join(folderpath, hash_filename)
if not os.path.isfile(full_hash_filename):
urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename)
hash_list = np.loadtxt(full_hash_filename, dtype="str").T
for j in range(0, 20, 1):
for i in range(0, 256, 1):
filename = f"GaiaSource_000-0{j:0{2}d}-{i:0{3}d}.fits"
urlstr = urlbase + filename
fullfilename = os.path.join(folderpath, filename)
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = filehash(fullfilename, algorithm="md5")
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print(
"File corruption detected, astroNN is attempting to download again"
)
gaia_source(dr=dr, flag=1)
else:
print(fullfilename + " was found!")
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(
unit="B",
unit_scale=True,
miniters=1,
desc=urlstr.split("/")[-1],
) as t:
urllib.request.urlretrieve(
urlstr, fullfilename, reporthook=t.update_to
)
checksum = filehash(fullfilename, algorithm="md5")
if checksum != file_hash and len(file_hash) != 0:
print(
"File corruption detected, astroNN is attempting to download again"
)
gaia_source(dr=dr, flag=1)
print(
f"Downloaded Gaia DR{dr} Gaia Source ({(j * 256 + i):d} of {(256 * 20 + 112):d}) "
f"file catalog successfully to {fullfilename}"
)
fulllist.extend([fullfilename])
for i in range(0, 111, 1):
filename = f"GaiaSource_000-020-{i:0{3}d}.fits"
urlstr = urlbase + filename
fullfilename = os.path.join(folderpath, filename)
file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)]
# Check if files exists
if os.path.isfile(fullfilename) and flag is None:
checksum = filehash(fullfilename, algorithm="md5")
# In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too
if checksum != file_hash and len(file_hash) != 0:
print(checksum)
print(file_hash)
print(
"File corruption detected, astroNN is attempting to download again"
)
gaia_source(dr=dr, flag=1)
else:
print(fullfilename + " was found!")
elif not os.path.isfile(fullfilename) or flag == 1:
# progress bar
with TqdmUpTo(
unit="B", unit_scale=True, miniters=1, desc=urlstr.split("/")[-1]
) as t:
urllib.request.urlretrieve(
urlstr, fullfilename, reporthook=t.update_to
)
checksum = filehash(fullfilename, algorithm="md5")
if checksum != file_hash and len(file_hash) != 0:
print(
"File corruption detected, astroNN is attempting to download again"
)
gaia_source(dr=dr, flag=1)
print(
f"Downloaded Gaia DR{dr} Gaia Source ({(20 * 256 + i):d} of {(256 * 20 + 112):d}) file "
f"catalog successfully to {fullfilename}"
)
fulllist.extend([fullfilename])
else:
raise ValueError("gaia_source() only supports Gaia DR1 Gaia Source")
return fulllist
@deprecated
def anderson_2017_parallax(cuts=True):
"""
NAME:
anderson_2017_parallax
PURPOSE:
load pre-compiled Anderson et al 2017 improved parallax from data-driven stars model
INPUT:
cuts (boolean): whether to cut those parallax err larger than 20% or not
OUTPUT:
ra (ndarray)
dec (ndarray)
parallax (ndarray): parallax in mas
parallax_err (ndarray): 1-standard derivation parallax error in mas
HISTORY:
2017-Dec-22 - Written - Henry Leung (University of Toronto)
"""
fullfilename = os.path.join(
astroNN.data.datapath(), "anderson_2017_dr14_parallax.npz"
)
print(
"\nOriginal dataset at: http://voms.simonsfoundation.org:50013/8kM7XXPCJleK2M02B9E7YIYmvu5l2rh/ServedFiles/"
)
print(
"Please be advised starting from 26 April 2018, anderson2017 in astroNN was reduced to parallax cross "
"matched with APOGEE DR14 only"
)
print(
"If you see this message, anderson2017 in this astroNN version is reduced. Moreover, anderson2017 will be "
"removed in the future\n"
)
hdu = np.load(fullfilename)
ra = hdu["ra"]
dec = hdu["dec"]
parallax = hdu["parallax"]
parallax_err = hdu["parallax_err"]
if cuts is True:
good_index = np.where(parallax_err / parallax < 0.2)[0]
ra = ra[good_index]
dec = dec[good_index]
parallax = parallax[good_index]
parallax_err = parallax_err[good_index]
return ra, dec, parallax, parallax_err
[docs]@deprecated
def gaiadr2_parallax(cuts=True, keepdims=False, offset=False):
"""
Load Gaia DR2 - APOGEE DR14 matches, indices corresponds to APOGEE allstar DR14 file
:param cuts: Whether to cut bad data (negative parallax and percentage error more than 20%), or a float to set the threshold
:type cuts: Union[boolean, float]
:param keepdims: Whether to preserve indices the same as APOGEE allstar DR14, no effect when cuts=False, set to -9999 for bad indices when cuts=True keepdims=True
:type keepdims: boolean
:param offset: Whether to correction Gaia DR2 zero point offset
- False to assume no offset correction
- True to assume 52.8-4.21(G-12.2)
- "leungbovy2019" for leung & bovy 2019 offset correction
- a float to assume a float offset globally
:type offset: Union[boolean, float, str]
:return: numpy array of ra, dec, parallax, parallax_error
:rtype: ndarrays
:History: 2018-Apr-26 - Written - Henry Leung (University of Toronto)
"""
fullfilename = os.path.join(
astroNN.data.datapath(), "gaiadr2_apogeedr14_parallax.npz"
)
print(
"This is Gaia DR2 - APOGEE DR14 matched parallax, RA DEC in J2015.5, parallax in mas"
)
hdu = np.load(fullfilename)
ra = np.array(hdu["RA"])
dec = np.array(hdu["DEC"])
parallax = np.array(hdu["parallax"])
parallax_err = np.array(hdu["parallax_error"])
gmag = np.array(hdu["g"])
if (cuts is True or isinstance(cuts, float)) and keepdims is False:
good_idx = (parallax_err / parallax < (0.2 if cuts is True else cuts)) & (
parallax > 0.0
)
ra = ra[good_idx]
dec = dec[good_idx]
parallax = parallax[good_idx]
parallax_err = parallax_err[good_idx]
gmag = gmag[good_idx]
elif (cuts is True or isinstance(cuts, float)) and keepdims is True:
print("Moreover, indices correspond to APOGEE allstar DR14 file")
# Not magic_number because this should be apogee style
bad_idx = (parallax_err / parallax > (0.2 if cuts is True else cuts)) | (
parallax < 0.0
)
parallax[bad_idx] = -9999.0
parallax_err[bad_idx] = -9999.0
else:
# no cuts so do nothing
pass
if offset is True:
parallax[parallax != -9999.0] += 0.0528 - 0.0421 * (
gmag[parallax != -9999.0] - 12.2
)
elif offset is False:
pass
elif isinstance(offset, float):
parallax[parallax != -9999.0] += offset
elif offset == "leungbovy2019":
def bias(x):
bias = 0.056 - 0.00574 * x - 0.0096 * x**2
return bias
parallax[(parallax != -9999.0) & (parallax < 2.0)] += bias(
parallax[(parallax != -9999.0) & (parallax < 2.0)]
)
else:
raise ValueError("Unknown offset option")
return ra, dec, parallax, parallax_err