Often useful to have the ability to split a csv/large text file into chunks. Found this useful during a recent project, where we needed to pass in a value i.e. number of lines per file, and split into many files.
This kind of script can be used by external programs i.e. ODI/OWB as a post map step.
Ful l script below ( splitfiles.sh)
#!/bin/bash
# Accepts 1 parameter i.e. number of lines per file (transactions)
usage()
{
echo “Usage : Please supply NoOfTransactions to split file”
echo “Example : splitfiles.sh 100”
exit 1
}
#Call Usage if Parameter not passed
[[ $# -eq 0 ]] && usage
#Set Local Variable to Parameter passed
NoTransactions=”$1″
FileLocation=”/u01/oradata/*.csv”
PhysicalLocation=”/u01/oradata/splitfiles”
SplitFilesExt=”.*”
SplitFiles=$FileLocation$SplitFilesExt
echo “SplitFiles : ” $test
#Change DIR to output directory
cd $PhysicalLocation
#Check files exists
if ls $FileLocation 1> /dev/null 2>&1; then
echo “Will Split files with each file contain : ” $NoTransactions ” Records”
else
echo “No files to process.. exiting..”
exit 1
fi
#Split files into x lines
for i in $FileLocation; do
echo ” value is : $?”
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured : No csv files to process “
exit 1
fi
echo “Files to process = :$i”
#Remove Header
sed -i 1d $i
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured : Unable to remove header row from parent file “
exit 1
fi
#Split files
#Currently will split files at lines
split -d –lines=$NoTransactions $i $i.
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured : split function returned errors “
exit 1
fi
#Only attempt to re-add header if split files exist
if ls $SplitFiles 1> /dev/null 2>&1; then
#Add Header
sed -i $’1 i\\\headerline’ *.csv.*
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured : Unable to add header record “a
exit 1
fi
fi
done
#Remove any files of 0 bytes
find $FileLocation -name ‘*.csv’ -size 0 -print0 | xargs -0 rm
#Remove Original file
rm $i
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured : Unable to remove parent file “
exit 1
fi
if [ “$?” != “0” ]; then
#Errors have occured
echo “Error occured, Usage is splitfile.sh <NoOfTransactions>”
fi