The following Bash shell script will look for files in an Object Store, and copy locally. Tests if files exist..
#!/bin/bash
#This script will create the LiveApps data tables from the files in the StorageContainer sent via Scott/CRC
CONTAINER=sc-oc-vm-dev-ace-01
DIRECTORY=
PRICINGREPORT=”PriceReport_$(date +”%Y-%m-%d”).csv”
LIVEAPPSDATA=”RMSLiveAppsData_$(date +”%Y-%m-%d”).csv”
FILELINECOUNT=0
echo “Object Storage Container Name :” $CONTAINER
echo “LiveApps Pricing Data File :” $PRICINGREPORT
echo “LiveApps Data File :” $LIVEAPPSDATA
echo “—————————————————————–“
echo “”
echo “”
echo “—-Checking if files exist on local file system —————-“
if [ -f “$PRICINGREPORT” ] && [ -f “$LIVEAPPSDATA” ]
then
echo $PRICINGREPORT “Found….”
echo $LIVEAPPSDATA “Found….”
echo ” Loading into BigCompute….”
else
echo “”
echo “————-Aborted Data Files not Found—————-“
echo $PRICINGREPORT “..File Not Found”
echo $LIVEAPPSDATA “..File Not Found”
echo “———————————————————“
fi
echo “”
echo “—— Checking if files exist within Container —————–“
echo “—— must su to hdfs account, otherwise this fails ———-“
#Test files that do exist, comment these out ….
PRICINGREPORT=”PriceReport_2018-04-23.csv”
LIVEAPPSDATA=”RMSLiveAppsData_2018-04-23.csv”
sudo su – hdfs sh -c “hadoop fs -ls swift://$CONTAINER.default/$PRICINGREPORT” > files.txt
sudo su – hdfs sh -c “hadoop fs -ls swift://$CONTAINER.default/$LIVEAPPSDATA” >> files.txt
export FILELINECOUNT=`<files.txt wc -l`
echo “—– FileLineCount is :” $FILELINECOUNT
if [ “$FILELINECOUNT” = “0” ]
then
echo “Container is empty”
echo $PRICINGREPORT “Not Found….”
echo $LIVEAPPSDATA “Not Found….”
else
echo “Files Exist Load into Hive”
echo “Copy File from Container to Local File system”
sudo su – hdfs sh -c “hadoop fs -copyToLocal -crc swift://$CONTAINER.default/$PRICINGREPORT /data/landing/LIVEAPPS/PriceReport.csv”
sudo su – hdfs sh -c “hadoop fs -copyToLocal -crc swift://$CONTAINER.default/$LIVEAPPSDATA /data/landing/LIVEAPPS/RMSLiveAppsData.csv”
fi
This script calls other shell scripts, also has logic to continue/fail
# Parent ETL script which calls script to load and update more2 data marts
#
declare ERROR_COUNT_VALUE=0
declare FILE_COUNT=0
#Unzip compressed file
cd /data/landing/MR2
#Remove old files
rm /data/landing/MR2/*.txt
#Put this back in for LIVE
#rm /data/landing/MR2/cleaned/*.txt
unzip WGL_ALLData.zip
#Put this back in for LIVE
#/data/landing/MR2/scripts/clean_customerdata_v1.sh > /data/landing/MR2/scripts/clean_customerdata_v1.log
#Copy V6 data to HDFS i.e. externalcustomerlinks,externalorderlinks,externalprovenances to RAW then STAGE
/data/landing/MR2/scripts/copy_v6_hdfs.sh > /data/landing/MR2/scripts/copy_v6_hdfs.log
#Load Data into V6_raw schema
/data/landing/MR2/scripts/load_v6_raw.sh > /data/landing/MR2/scripts/load_v6_raw.log
#Rebuild V6 externalcustomerlinks,externalorderlinks,externalprovenances in dap_utl schema from v6_raw schema
/data/landing/MR2/scripts/load_v6_dap_utl.sh > /data/landing/MR2/scripts/load_v6_dap_utl.log
#Copy cleaned files to HDFS
/data/landing/MR2/scripts/copy_mr2_hdfs.sh > /data/landing/MR2/scripts/copy_mr2_hdfs.log
#Load data into MR2_raw schema
/data/landing/MR2/scripts/load_mr2_raw.sh > /data/landing/MR2/scripts/load_mr2_raw.log
#Create manifest file
/data/landing/MR2/scripts/create_file_manifest.sh > /data/landing/MR2/scripts/create_file_manifest.log
#Add a check to ensure manifest file contains 14 files, if not STOP as not all files exist
#Remove old file
FILE_COUNT_FILE=/data/landing/MR2/scripts/files_exist.txt
rm $FILE_COUNT_FILE
#Check for Warnings
ls /data/landing/MR2/cleaned/WGL* | wc -l > /data/landing/MR2/scripts/files_exist.txt
#Check if error_count file exists
if [ -f “$FILE_COUNT_FILE” ]
then
FILE_COUNT_VALUE=$(</data/landing/MR2/scripts/files_exist.txt)
echo $FILE_COUNT_VALUE ” : Number of Files to Load…”
#should be 15 in LIVE
if [ “$FILE_COUNT_VALUE” = “14” ]; then
echo “No warnings, all files exist continue ETL”
#Load Manifest file and compare row counts to mr2_raw
/data/landing/MR2/scripts/load_manifest_and_validate.sh > /data/landing/MR2/scripts/load_manifest_and_validate.log
#Check to see if any Manifest vs RAW row counts exist
#If there are issues, the script will stop the ETL until the RAW data matches the file manifest
#Remove old file
ERROR_COUNT_FILE=/data/landing/MR2/scripts/check_ok.txt
rm $ERROR_COUNT_FILE
#Check for Warnings
sudo su – hive sh -c “hive -f /data/landing/MR2/scripts/check_ok.hql” > /data/landing/MR2/scripts/check_ok.txt
#Check if error_count file exists
if [ -f “$ERROR_COUNT_FILE” ]
then
ERROR_COUNT_VALUE=$(</data/landing/MR2/scripts/check_ok.txt)
echo $ERROR_COUNT_VALUE ” : converted Error Count value…”
#should be 0 in LIVE
if [ “$ERROR_COUNT_VALUE” = “0” ]; then
echo “No warnings, continue ETL”
#Continue with ETL
#Load mr2_raw into mr2_stg schema
/data/landing/MR2/scripts/load_mr2_stg.sh > /data/landing/MR2/scripts/load_mr2_stg.log
#Load Marts
/data/landing/MR2/scripts/WIG_3.1.001_replicate_more2_orderline.sh > /data/landing/MR2/scripts/WIG_3.1.001_replicate_more2_orderline.log
/data/landing/MR2/scripts/WIG_3.1.001_crc_orderlines_all.sh > /data/landing/MR2/scripts/WIG_3.1.001_crc_orderlines_all.log
/data/landing/MR2/scripts/WIG_3.1.001_wig_cust_ever.sh > /data/landing/MR2/scripts/WIG_3.1.001_wig_cust_ever.log
/data/landing/MR2/scripts/WIG_2.1.001_Populate_fo_lo_to_stg_tables.sh > /data/landing/MR2/scripts/WIG_2.1.001_Populate_fo_lo_to_stg_tables.log
/data/landing/MR2/scripts/WIG_3.1.001_Dedupe_wiggle_customers_v5.sh > /data/landing/MR2/scripts/WIG_3.1.001_Dedupe_wiggle_customers_v5.log
/data/landing/MR2/scripts/WIG_3.1.001_crc_customerdata.sh > /data/landing/MR2/scripts/WIG_3.1.001_crc_customerdata.log
/data/landing/MR2/scripts/WIG_2.1.002_Populate_fo_lo_to_stg_tables_CRC_hist.sh > /data/landing/MR2/scripts/WIG_2.1.002_Populate_fo_lo_to_stg_tables_CRC_hist.log
/data/landing/MR2/scripts/WIG_3.1.003_Dedupe_Group_Customers.sh > /data/landing/MR2/scripts/WIG_3.1.003_Dedupe_Group_Customers.log
/data/landing/MR2/scripts/WIG_3.1.002_Dedupe_CRC_Customers.sh > /data/landing/MR2/scripts/WIG_3.1.002_Dedupe_CRC_Customers.log
/data/landing/MR2/scripts/WIG_3.1.004_populate_dap_det.customer_account_all.sh > /data/landing/MR2/scripts/WIG_3.1.004_populate_dap_det.customer_account_all.log
/data/landing/MR2/scripts/WIG_3.1.005_customer_account_first_order.sh > /data/landing/MR2/scripts/WIG_3.1.005_customer_account_first_order.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.001_hygiene_v7.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.001_hygiene_v7.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.004_summary_report_all.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.004_summary_report_all.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.005_gross_to_net.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.005_gross_to_net.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.006_generate_file_analysis_temp_table.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.006_generate_file_analysis_temp_table.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.007_File_Analysis.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.007_File_Analysis.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.008_file_analysis_worth.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.008_file_analysis_worth.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.009_ltv_department.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.009_ltv_department.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.010_ltv_fov.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.010_ltv_fov.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.011_ltv_source.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.011_ltv_source.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.012_ltv_brand.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.012_ltv_brand.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.015_merchandise_brand.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.015_merchandise_brand.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.013_merchandise_category.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.013_merchandise_category.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.014_repeat_purchase.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.014_repeat_purchase.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.016_RFM.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.016_RFM.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.017_group_overlap_report.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.017_group_overlap_report.log
/data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.018_summary_report_all_group.sh > /data/landing/MR2/scripts/MR2_DELTA_JOB_MRT_4.1.018_summary_report_all_group.log
else
echo “Warnings during ingest of data, terminating ETL, check audit table”
#echo $ERROR_COUNT_FILE “: Error Count File Exists…”
#echo $ERROR_COUNT_VALUE ” : Error Count value…”
fi
fi
else
echo “Warning : Files to load should be 15. Inconsistent number of source files to load check location /data/landing/MR2/cleaned “
fi
fi