Deduplicate(keep=[LastRow], key=[ucPKL], order=[PROCTIME]) -> Calc(select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, listAgentKeyL IS NOT NULL AS $f16])
Join(joinType=[LeftOuterJoin], where=[((listAgentKeyL = ucPKA) AND $f16)], select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, $f16, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num], leftInputSpec=[HasUniqueKey], rightInputSpec=[JoinKeyContainsUniqueKey]) -> Calc(select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num AS row_num0, buyerAgentKeyL IS NOT NULL AS $f24])
Join(joinType=[LeftOuterJoin], where=[((buyerAgentKeyL = ucPKA0) AND $f24)], select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, $f24, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num], leftInputSpec=[HasUniqueKey], rightInputSpec=[JoinKeyContainsUniqueKey]) -> Calc(select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num AS row_num1, coListAgentKeyL IS NOT NULL AS $f32])
Join(joinType=[LeftOuterJoin], where=[((coListAgentKeyL = ucPKA1) AND $f32)], select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num1, $f32, ucPKA1, ucUpdateTSA1, ucVersionA1, ucRowTypeA1, ucTypeA1, dataA1, proctime2, row_num], leftInputSpec=[HasUniqueKey], rightInputSpec=[JoinKeyContainsUniqueKey]) -> Calc(select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num1, ucPKA1, ucUpdateTSA1, ucVersionA1, ucRowTypeA1, ucTypeA1, dataA1, proctime2, row_num AS row_num2, coBuyerAgentKeyL IS NOT NULL AS $f40])
Join(joinType=[LeftOuterJoin], where=[((coBuyerAgentKeyL = ucPKA2) AND $f40)], select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num1, ucPKA1, ucUpdateTSA1, ucVersionA1, ucRowTypeA1, ucTypeA1, dataA1, proctime2, row_num2, $f40, ucPKA2, ucUpdateTSA2, ucVersionA2, ucRowTypeA2, ucTypeA2, dataA2, proctime3, row_num], leftInputSpec=[HasUniqueKey], rightInputSpec=[JoinKeyContainsUniqueKey]) -> Calc(select=[ucPKL, ucUpdateTSL, ucVersionL, ucRowTypeL, ucTypeL, listAgentKeyL, buyerAgentKeyL, coListAgentKeyL, coBuyerAgentKeyL, listOfficeKeyL, buyerOfficeKeyL, coListOfficeKeyL, coBuyerOfficeKeyL, dataL, proctime, ucPKA, ucUpdateTSA, ucVersionA, ucRowTypeA, ucTypeA, dataA, proctime0, row_num0, ucPKA0, ucUpdateTSA0, ucVersionA0, ucRowTypeA0, ucTypeA0, dataA0, proctime1, row_num1, ucPKA1, ucUpdateTSA1, ucVersionA1, ucRowTypeA1, ucTypeA1, dataA1, proctime2, row_num2, ucPKA2, ucUpdateTSA2, ucVersionA2, ucRowTypeA2, ucTypeA2, dataA2, proctime3, row_num AS row_num3, listOfficeKeyL IS NOT NULL AS $f48])
1. So the number of records received will remain same for both with and without NOT NULL filter. For the scenario with NOT NULL filter, there is an extra field indicating whether the JOIN key is NULL or not.
2. If the above #1 point is correct, then a qq, would this extra field (indicating whether the JOIN key is NULL or not) help in avoiding unnecessary processing for the cases when the JOIN key is null?
3. If the above #2 point is correct, then I should have optimal performance even though there is a data skew.
Please let me know your thoughts?
Thanks,
Eva