Function ParsePage_IMDBPeopleBIO(CombinedHTML:String; ShouldParseBio, ShouldParseCareer: Boolean):Cardinal; //BlockOpen //Returns: // Result:=prFinished; Script has finished gathering data // Result:=prError; If any big problem with exit; //Retrieve: ~bio~ Biography from "Mini Bio" IMDB section Var curPos,endPos,debug_pos1:Integer; ItemValue:String; PersonID,ItemValue0,ItemValue10,ItemValue1,ItemValue11:String; ItemList,ItemList00,ItemList0,ItemList1,ItemList11,ItemList12:String; FinalValue: String; ItemList2,ItemList10,ItemList20,ItemValue3:String; BirthNameValue, BioList: String; DeathAge, AltNames1, ItemListBase, ItemListBase1, ItemValueBase, ItemValueBase0, ItemValueBase1:String; IsBioFieldInitiallyEmpty: Boolean; BioFieldEmptyStr: String; Begin LogMessage('ParsePage_IMDBPeopleBIO: Starting processing.'); LogMessage('CombinedHTML length: ' + IntToStr(Length(CombinedHTML))); LogMessage('Function ParsePage_IMDBPeopleBIO BEGIN=====================||'); Result:=prFinished; //It will change to prError if any big problem with exit; //LogMessage('Result set to prFinished'); //Log the initial result setting Result := prFinished; // Check if the 'bio' field is empty IsBioFieldInitiallyEmpty := Length(GetFieldValueXML('bio')) = 0; (* // Convert the Boolean result to a string for logging If IsBioFieldInitiallyEmpty Then BioFieldEmptyStr := 'True' Else BioFieldEmptyStr := 'False'; LogMessage('IsBioFieldEmpty: ' + BioFieldEmptyStr); //*) //First to get data from Base page by parsing FileNameMain If ShouldParseCareer Then Begin //~jobTitle~ (Profession) //Begin of scrap the json container. ItemListBase1:=TextBetWeenFirst(CombinedHTML,''); //LogMessage(' Parse results ('+IntToStr(curPos)+','+IntToStr(endPos)+') complex ItemList: '+'', CombinedHTML); LogMessage('curPos after finding Alternative Names curPos: ' + IntToStr(curPos)); If curPos > 0 Then Begin EndPos := curPos; //LogMessage('EndPos set to curPos: ' + IntToStr(EndPos)); // Extract values between the specified tags AltNames1 := HTMLValues(CombinedHTML, '', '"feature_contribution_header":"Contribute to this page"', '{"node":{"displayableProperty":{"value":{"plainText":"', '","__typename":"Markdown"},"__typename":"DisplayableNameAkaProperty"},"__typename":"NameAka"},"', ', ', EndPos); //LogMessage(' * Parsed Result Alternative Name: ' + AltNames1); //AltNames1:=HTMLValues(CombinedHTML,'Alternate Names:','','','','
',EndPos); //AltNames1:=StringReplace(ItemValue1,' | ','
',True,False,True); //AltNames1:=StringReplace(AltNames1,' | ',#13,True,False,True); AltNames1:=StringReplace(AltNames1,'\u0026',#38,True,False,True); If AltNames1 <> '' then AddFieldValueXML('altnames', AltNames1); //*) If AltNames1 <> '' then LogMessage(' Parsed Results All Expanded Alternative Names: ' + AltNames1 + '||'); (*// When Alternative Names wanted in the "comment" field If AltNames1 <> '' then ItemListBase:=ItemListBase+#13#10+'Alternative Names: '+AltNames1 Else ItemList:=ItemListBase+#13#10; LogMessage(' Comment List After alternative Names:'+ItemListBase+'||'); //*) End; //(* //~Height~ curPos:=Pos('

Personal details',CombinedHTML); If curPos>0 Then Begin EndPos:=curPos; ItemValueBase0:=HTMLValues2(CombinedHTML,'','','','
',EndPos); If ItemValueBase0 <> '' then ItemListBase:=ItemListBase+ #13#10 +'Height: '+ItemValueBase0+' '; LogMessage(' Parse Results Height:'+ItemValueBase0+'||'); End; //*) //(* //~Nickname~ curPos:=Pos('','
','
',EndPos); //LogMessage(' * Parse Results Nickname1:'+ItemValueBase0+'||'); //ItemValueBase0:=StringReplace(ItemValueBase0,' See more »','',True,False,True); If ItemValueBase0 <> '' then ItemListBase:=ItemListBase+ #13#10 +'Nickname: '+ItemValueBase0+' '; LogMessage(' Parse Results Nickname:'+ItemValueBase0+'||'); End; //*) //(* //~Nicknames~ curPos:=Pos('','',', ',EndPos); //LogMessage(' * Parse Results Nickname1:'+ItemValueBase0+'||'); //ItemValueBase0:=StringReplace(ItemValueBase0,' See more »','',True,False,True); If ItemValueBase1 <> '' then ItemListBase:=ItemListBase+ #13#10 +'Nickname: '+ItemValueBase1+' '; LogMessage(' Parse Results Nickname:'+ItemValueBase1+'||'); End; //*) End; // List to move to 'bio' field BioList := BioList + ItemListBase + #13#10; LogMessage(' List that is now in Bio field:'+ItemListBase+'||'); //ItemList := ItemListBase; //LogMessage(' List ItemList := ItemListBase;:'+ItemListBase+'||'); If ShouldParseBio Then Begin //Now parsing FileNameBio //(* //Get "Biography" info curPos:=Pos('

Biography

',CombinedHTML); //Strings start which opens the block content data. WEB_SPECIFIC if (curPos=0) then Exit; //*) //(* ItemList2:=''; ItemList11:=''; //*) //(* //Get PersonID //LogMessage('Attempting to find PersonID'); PersonID := TextBetWeenFirst(CombinedHTML, ''); //WEB_SPECIFIC if (Length(PersonID) > 2) then begin ItemList2 := '--------------------------------------------------------------------------'+#13+'Biography Info'; //ItemList2 := '--------------------------------------------------------------------------'+#13+'Biography Info'; LogMessage('Get result PersonID: ' + PersonID + '||'); end else begin LogMessage('Error: PersonID not found'); Result := prError; //Set the result to error if PersonID is not found end; //*) //(* //Get "Biography" info LogMessage('Attempting to find Biography section'); curPos := Pos('
', Copy(CombinedHTML, curPos, Length(CombinedHTML) - curPos + 1)) + curPos - 1; if endPos = curPos - 1 then Begin LogMessage('Error: End of Biography section not found'); Result := prError; //Set the result to error if the section is not found Exit; End; ItemList0 := Copy(CombinedHTML, curPos, endPos - curPos + Length('')); //Include in the end position //LogMessage('Biography section found' + ItemList0); //Extract "Mini bio" Biography text LogMessage('Extracting Mini Bio text:'); curPos := Pos('') - 1; // Extract only the specific phrase FinalValue := Copy(ItemList0, curPos, endPos - curPos + 1); // Log the extracted value for debugging //LogMessage(' * FinalValue Before Cleaning: ' + FinalValue + '||'); // Clean surrounding tags without using RemoveTags FinalValue := StringReplace(FinalValue, '',CombinedHTML,curPos); ItemList00:=Copy(CombinedHTML,curPos,endPos-curPos); //LogMessage(' ** Parse Biography '+#13+ItemList00+' **'); //(* If (Length(ItemList00)>0) Then Begin ItemValue10:=TextBetWeenFirst(ItemList00,'
'); //if BIRTH_NAME_IN_TRANSNAME then //if ItemValue10 <> '' then //AddFieldValueXML('transname',ItemValue10); If ItemValue10 <> '' then //LogMessage(' Get result from Birth Name02:'+ItemValue10+'||'); ItemValue10:='BirthName: '+ItemValue10; If ItemValue10 <> '' then ItemList12:=ItemList12+#13+'--------------------------------------------------------------------------'+#13+ItemValue10; //If FinalValue := 0 then Begin //FinalValue := BirthNameValue; If FinalValue <> '' then FinalValue := '--------------------------------------------------------------------------' + #13#10 + FinalValue else FinalValue := BirthNameValue; If ItemValue10 <> '' then BirthNameValue:='--------------------------------------------------------------------------' + #13#10 + ItemValue10 + #13#10 + '--------------------------------------------------------------------------' else BirthNameValue:='--------------------------------------------------------------------------' End; End; //*) If (opBio = 0) or ((opBio = 1) and IsBioFieldInitiallyEmpty) and (opCareer <> 0) or (opBio <> 0) Then Begin If BIO_INFO_IN_BIO then AddFieldValueXML('bio', ItemList12) Else If Not(BIO_INFO_IN_BIO) and BIO_URL_IN_BIO and Not(IMDB_MINI_IN_BIO) then AddFieldValueXML('bio', BirthNameValue + #13#10 + BioList + #13#10 + ItemList + #13#10 + FinalValue) Else AddFieldValueXML('bio', ItemList11); LogMessage(' Get result from BirthNameValue + #13#10 + BioList + #13#10 + ItemList + #13#10 + FinalValue: '+ BirthNameValue + #13#10 + BioList + #13#10 + ItemList + #13#10 + FinalValue + '||'); Result := prFinished; End; LogMessage('Function ParsePage_IMDBPeopleBIO END=====================||'); End; //BlockClose //*) ............... // Parse Biography provider page = BASE_URL_BIO_PERSON----------------------------------------------------------------------- If (GET_FULL_BIO) Then Begin // Conditions for both opBio and opCareer LogMessage('Actual opBio value from ini file: ' + Copy(PVDConfigOptions, opBio, 1)); LogMessage('Actual opCareer value from ini file: ' + Copy(PVDConfigOptions, opCareer, 1)); ShouldParseBio := Not(USE_SAVED_PVDCONFIG and (Copy(PVDConfigOptions, opBio, 1) = '0')); ShouldParseCareer := Not(USE_SAVED_PVDCONFIG and (Copy(PVDConfigOptions, opCareer, 1) = '0')); LogMessage('ShouldParseBio: ' + BoolToStr(ShouldParseBio)); LogMessage('ShouldParseCareer: ' + BoolToStr(ShouldParseCareer)); If (ShouldParseBio or ShouldParseCareer) Then Begin DownloadURL := StringReplace(BASE_URL_BIO_PERSON, '%IMDB_ID', PersonID, True, True, False); HTML := DownloadPageBio(FileNameBio); // True page for parsing HTML := HTMLToText(HTML); CombinedHTML := HTML; DownloadURL := BASE_URL_PERSON_PRE_TRUE + PersonID + BASE_URL_SUF; HTML := DownloadPageMain(FileNameMain); HTML := HTMLToText(HTML); CombinedHTML := CombinedHTML + HTML; ResultTmp := ParsePage_IMDBPeopleBIO(CombinedHTML, ShouldParseBio, ShouldParseCareer); If Not(ResultTmp = prFinished) Then Begin Result := ResultTmp; Exit; End; End; End; FROM THE LOG AFTER PARSING: (1/19/2025 11:59:39 PM) Function DownloadPageMain END======================| (1/19/2025 11:59:39 PM) ParsePage_IMDBPeopleBIO: Starting processing. (1/19/2025 11:59:39 PM) CombinedHTML length: 1623792 (1/19/2025 11:59:39 PM) Function ParsePage_IMDBPeopleBIO BEGIN=====================|| (1/19/2025 11:59:39 PM) Person -> LoadStatic -> 0ms (1/19/2025 11:59:39 PM) Person -> LoadMultivalues -> 0ms (1/19/2025 11:59:39 PM) Person -> LoadFilms -> 0ms (1/19/2025 11:59:39 PM) Person -> LoadAwards -> 0ms (1/19/2025 11:59:39 PM) Person -> LoadImages -> 0ms (1/19/2025 11:59:39 PM) Get result Profession:Additional Crew, Actor, Director|| (1/19/2025 11:59:40 PM) Parse Results Name12:'Chico' Hernandez|| (1/19/2025 11:59:40 PM) Parse Results Born10:January 2, 1958 in Durango, Mexico|| (1/19/2025 11:59:40 PM) curPos after finding Alternative Names curPos: 1003879 (1/19/2025 11:59:41 PM) Parsed Results All Expanded Alternative Names: Chico Fernandez, Julio Fernandez, Jose Ma. 'Chico' Hernandez Haro, Jose Maria 'Chico' Hernandez Haro, Jose Maria Hernandez Haro, Chico Hernandez, Jose Maria \"Chico\" Hernandez, Jose Maria Hernandez, José Maria 'Chico' Hernández|| (1/19/2025 11:59:41 PM) Parse Results Height:|| (1/19/2025 11:59:41 PM) List that is now in Bio field:Profession: Additional Crew, Actor, Director Additional Filmography Career: Transportation Department, Stunts Name: 'Chico' Hernandez Born: January 2, 1958 in Durango, Mexico|| (1/19/2025 11:59:41 PM) Get result from BioList Profession: Additional Crew, Actor, Director Additional Filmography Career: Transportation Department, Stunts Name: 'Chico' Hernandez Born: January 2, 1958 in Durango, Mexico || (1/19/2025 11:59:41 PM) Get result PersonID: www.imdb.com/name/nm0379491/bio|| (1/19/2025 11:59:41 PM) Attempting to find Biography section (1/19/2025 11:59:41 PM) Extracting Mini Bio text: (1/19/2025 11:59:41 PM) curPos for Mini Bio set to: 716 (1/19/2025 11:59:41 PM) endPos for Mini Bio set to: 1222 (1/19/2025 11:59:41 PM) Get result from BirthNameValue + #13#10 + BioList + #13#10 + ItemList + #13#10 + FinalValue: -------------------------------------------------------------------------- Profession: Additional Crew, Actor, Director Additional Filmography Career: Transportation Department, Stunts Name: 'Chico' Hernandez Born: January 2, 1958 in Durango, Mexico 'Chico' Hernandez was born on January 2, 1958 in Durango, Mexico. He is an actor and director, known for Маска Зороа (1998), Мексиканац (2001) and Легенда о Зороу (2005). || (1/19/2025 11:59:41 PM) Function ParsePage_IMDBPeopleBIO END=====================|| (1/19/2025 11:59:41 PM) Function DownloadPageCredit BEGIN======================|