mirror of
https://github.com/sussy-code/smov.git
synced 2025-01-17 01:51:24 +01:00
testing subtitle scraping
This commit is contained in:
parent
a3b64c5105
commit
6395d75d78
3 changed files with 155 additions and 34 deletions
92
src/backend/helpers/subs.ts
Normal file
92
src/backend/helpers/subs.ts
Normal file
|
@ -0,0 +1,92 @@
|
|||
import { proxiedFetch } from "@/backend/helpers/fetch";
|
||||
import { testSubData } from "@/backend/helpers/testsub";
|
||||
import { PlayerMeta } from "@/stores/player/slices/source";
|
||||
import { normalizeTitle } from "@/utils/normalizeTitle";
|
||||
|
||||
interface SuggestResult {
|
||||
name: string;
|
||||
year: string;
|
||||
id: number;
|
||||
kind: "tv" | "movie";
|
||||
}
|
||||
|
||||
export interface Subtitle {
|
||||
id: string;
|
||||
language: string;
|
||||
}
|
||||
|
||||
const metaTypeToOpenSubs = {
|
||||
tv: "show",
|
||||
movie: "movie",
|
||||
} as const;
|
||||
|
||||
export async function getOpenSubsId(meta: PlayerMeta): Promise<string | null> {
|
||||
const req = await proxiedFetch<SuggestResult[]>(
|
||||
`https://www.opensubtitles.org/libs/suggest.php`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: {
|
||||
"Alt-Used": "www.opensubtitles.org",
|
||||
"X-Referer": "https://www.opensubtitles.org/en/search/subs",
|
||||
},
|
||||
query: {
|
||||
format: "json",
|
||||
MovieName: meta.title,
|
||||
},
|
||||
}
|
||||
);
|
||||
const foundMatch = req.find((v) => {
|
||||
const type = metaTypeToOpenSubs[v.kind];
|
||||
if (type !== meta.type) return false;
|
||||
if (+v.year !== meta.releaseYear) return false;
|
||||
return normalizeTitle(v.name) === normalizeTitle(meta.title);
|
||||
});
|
||||
if (!foundMatch) return null;
|
||||
return foundMatch.id.toString();
|
||||
}
|
||||
|
||||
export async function getHighestRatedSubs(id: string): Promise<Subtitle[]> {
|
||||
// TODO support episodes
|
||||
const document = await proxiedFetch<string>(
|
||||
`https://www.opensubtitles.org/en/search/sublanguageid-all/idmovie-${encodeURIComponent(
|
||||
id
|
||||
)}/sort-6/asc-0`
|
||||
);
|
||||
const dom = new DOMParser().parseFromString(document, "text/html");
|
||||
const table = dom.querySelector("#search_results > tbody");
|
||||
if (!table) throw new Error("No result table found");
|
||||
const results = [...table.querySelectorAll("tr[id^='name']")].map((v) => {
|
||||
const subId = v.id.substring(4); // remove "name" from "name<ID>"
|
||||
const languageFlag = v.children[1].querySelector("div[class*='flag']");
|
||||
if (!languageFlag) return null;
|
||||
const languageFlagClasses = languageFlag.classList.toString().split(" ");
|
||||
const languageCode = languageFlagClasses.filter(
|
||||
(cssClass) => cssClass === "flag"
|
||||
)[0];
|
||||
|
||||
return {
|
||||
id: subId,
|
||||
language: languageCode,
|
||||
};
|
||||
});
|
||||
|
||||
const languages: string[] = [];
|
||||
const output: Subtitle[] = [];
|
||||
results.forEach((v) => {
|
||||
if (!v) return;
|
||||
if (languages.includes(v.language)) return; // no duplicate languages
|
||||
output.push(v);
|
||||
languages.push(v.language);
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
export async function downloadSrt(_subId: string): Promise<string> {
|
||||
// TODO download, unzip and return srt data
|
||||
return testSubData.srtData;
|
||||
}
|
||||
|
||||
/**
|
||||
* None of this works, CF protected endpoints :(
|
||||
*/
|
11
src/backend/helpers/testsub.ts
Normal file
11
src/backend/helpers/testsub.ts
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue